Repository: spreadsheetlab/XLParser
Branch: master
Commit: 8b7d1c684b37
Files: 177
Total size: 25.9 MB
Directory structure:
gitextract_hbenmd2r/
├── .gitattributes
├── .gitignore
├── CHANGELOG.md
├── LICENSE.md
├── README.md
├── app/
│ ├── XLParser apps.sln
│ ├── XLParser.Web/
│ │ ├── .gitignore
│ │ ├── Default.aspx
│ │ ├── Default.aspx.cs
│ │ ├── Default.aspx.designer.cs
│ │ ├── Parse.cs
│ │ ├── Properties/
│ │ │ ├── AssemblyInfo.cs
│ │ │ ├── Resources.Designer.cs
│ │ │ └── Resources.resx
│ │ ├── Resources/
│ │ │ ├── ExcelBuiltinFunctionList-v120.txt
│ │ │ ├── ExcelBuiltinFunctionList-v139.txt
│ │ │ ├── ExcelBuiltinFunctionList-v141.txt
│ │ │ ├── ExcelBuiltinFunctionList-v142.txt
│ │ │ ├── ExcelBuiltinFunctionList-v150.txt
│ │ │ ├── ExcelBuiltinFunctionList-v151.txt
│ │ │ ├── ExcelBuiltinFunctionList-v152.txt
│ │ │ ├── ExcelBuiltinFunctionList-v160.txt
│ │ │ ├── ExcelBuiltinFunctionList-v161.txt
│ │ │ ├── ExcelBuiltinFunctionList-v162.txt
│ │ │ ├── ExcelBuiltinFunctionList-v163.txt
│ │ │ ├── ExcelBuiltinFunctionList-v170.txt
│ │ │ ├── ExcelBuiltinFunctionList-v171.txt
│ │ │ ├── ExcelBuiltinFunctionList-v172.txt
│ │ │ ├── ExcelBuiltinFunctionList-v173.txt
│ │ │ ├── ExcelBuiltinFunctionList-v174.txt
│ │ │ └── ExcelBuiltinFunctionList-v175.txt
│ │ ├── Web.Debug.config
│ │ ├── Web.Release.config
│ │ ├── Web.config
│ │ ├── XLParser Web.csproj
│ │ ├── XLParserVersions/
│ │ │ ├── v100/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ └── ExcelFormulaParser.cs
│ │ │ ├── v114/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ └── ExcelFormulaParser.cs
│ │ │ ├── v120/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ └── PrefixInfo.cs
│ │ │ ├── v139/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ └── PrefixInfo.cs
│ │ │ ├── v141/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ └── PrefixInfo.cs
│ │ │ ├── v142/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ └── PrefixInfo.cs
│ │ │ ├── v150/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ └── PrefixInfo.cs
│ │ │ ├── v151/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ └── PrefixInfo.cs
│ │ │ ├── v152/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ └── PrefixInfo.cs
│ │ │ ├── v160/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ └── PrefixInfo.cs
│ │ │ ├── v161/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ └── PrefixInfo.cs
│ │ │ ├── v162/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ ├── PrefixInfo.cs
│ │ │ │ └── WordsTerminal.cs
│ │ │ ├── v163/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ ├── PrefixInfo.cs
│ │ │ │ └── WordsTerminal.cs
│ │ │ ├── v170/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ ├── PrefixInfo.cs
│ │ │ │ └── WordsTerminal.cs
│ │ │ ├── v171/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ ├── PrefixInfo.cs
│ │ │ │ └── WordsTerminal.cs
│ │ │ ├── v172/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ ├── PrefixInfo.cs
│ │ │ │ └── WordsTerminal.cs
│ │ │ ├── v173/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ ├── PrefixInfo.cs
│ │ │ │ └── WordsTerminal.cs
│ │ │ ├── v174/
│ │ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ │ ├── ExcelFormulaParser.cs
│ │ │ │ ├── ParserReference.cs
│ │ │ │ ├── PrefixInfo.cs
│ │ │ │ └── WordsTerminal.cs
│ │ │ └── v175/
│ │ │ ├── ExcelFormulaGrammar.cs
│ │ │ ├── ExcelFormulaParser.cs
│ │ │ ├── ParserReference.cs
│ │ │ ├── PrefixInfo.cs
│ │ │ └── WordsTerminal.cs
│ │ ├── d3vizsvg.js
│ │ ├── packages.config
│ │ └── xlparser-web.css
│ └── lib/
│ └── .gitignore
├── doc/
│ ├── README.md
│ └── api/
│ ├── Doxyfile
│ └── README.MD
├── lib/
│ └── IronyExplorer/
│ ├── .gitignore
│ ├── IronyExplorer.sln
│ ├── LICENSE
│ ├── README.md
│ ├── libs/
│ │ └── FastColoredTextBox/
│ │ ├── FastColoredTextBox.XML
│ │ └── about.txt
│ └── src/
│ └── IronyExplorer.GrammarExplorer/
│ ├── App.config
│ ├── GrammarItemList.cs
│ ├── GrammarLoader.cs
│ ├── Highlighter/
│ │ ├── AboutCodeHighlighter.txt
│ │ ├── EditorAdapter.cs
│ │ ├── EditorViewAdapter.cs
│ │ ├── FastColoredTextBoxHighlighter.cs
│ │ ├── RichTextBoxHighlighter.cs
│ │ └── WavyLineStyle.cs
│ ├── IronyExplorer.GrammarExplorer.csproj
│ ├── Program.cs
│ ├── Properties/
│ │ ├── AssemblyInfo.cs
│ │ ├── Resources.Designer.cs
│ │ ├── Resources.resx
│ │ ├── Settings.Designer.cs
│ │ └── Settings.settings
│ ├── fmGrammarExplorer.Designer.cs
│ ├── fmGrammarExplorer.cs
│ ├── fmGrammarExplorer.resx
│ ├── fmSelectGrammars.Designer.cs
│ ├── fmSelectGrammars.cs
│ ├── fmSelectGrammars.resx
│ ├── fmShowException.Designer.cs
│ ├── fmShowException.cs
│ ├── fmShowException.resx
│ └── packages.config
└── src/
├── XLParser/
│ ├── ExcelFormulaGrammar.cs
│ ├── ExcelFormulaParser.cs
│ ├── FormulaAnalyzer.cs
│ ├── ParserReference.cs
│ ├── PrefixInfo.cs
│ ├── Resources/
│ │ └── ExcelBuiltinFunctionList.txt
│ ├── WordsTerminal.cs
│ ├── XLParser.csproj
│ └── XLParser.nuspec
├── XLParser.Tests/
│ ├── DatasetTests.cs
│ ├── FormulaAnalysisTest.cs
│ ├── ParserTests.cs
│ ├── PrefixInfoTests.cs
│ ├── PrintTests.cs
│ ├── WordsTerminalTests.cs
│ ├── XLParser.Tests.csproj
│ └── data/
│ ├── enron/
│ │ ├── formulas.txt
│ │ └── knownfails.txt
│ ├── euses/
│ │ ├── formulas.txt
│ │ └── knownfails.txt
│ └── testformulas/
│ ├── structured_references.txt
│ └── user_contributed.txt
├── XLParser.sln
└── signing.snk
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitattributes
================================================
###############################################################################
# Set default behavior to automatically normalize line endings.
###############################################################################
* text=auto
*.cs eol=crlf
*.fs eol=crlf
*.rc eol=crlf
*.vcproj eol=crlf
*.sln eol=crlf
*.nuspec eol=crlf
*.config eol=crlf
*.md eol=lf
*.html eol=lf
*.css eol=lf
###############################################################################
# Set default behavior for command prompt diff.
#
# This is need for earlier builds of msysgit that does not have it on by
# default for csharp files.
# Note: This is only used by command line
###############################################################################
#*.cs diff=csharp
###############################################################################
# Set the merge driver for project and solution files
#
# Merging from the command prompt will add diff markers to the files if there
# are conflicts (Merging from VS is not affected by the settings below, in VS
# the diff markers are never inserted). Diff markers may cause the following
# file extensions to fail to load in VS. An alternative would be to treat
# these files as binary and thus will always conflict and require user
# intervention with every merge. To do so, just uncomment the entries below
###############################################################################
#*.sln merge=binary
#*.csproj merge=binary
#*.vbproj merge=binary
#*.vcxproj merge=binary
#*.vcproj merge=binary
#*.dbproj merge=binary
#*.fsproj merge=binary
#*.lsproj merge=binary
#*.wixproj merge=binary
#*.modelproj merge=binary
#*.sqlproj merge=binary
#*.wwaproj merge=binary
###############################################################################
# behavior for image files
#
# image files are treated as binary by default.
###############################################################################
#*.jpg binary
#*.png binary
#*.gif binary
###############################################################################
# diff behavior for common document formats
#
# Convert binary document formats to text before diffing them. This feature
# is only available from the command line. Turn it on by uncommenting the
# entries below.
###############################################################################
#*.doc diff=astextplain
#*.DOC diff=astextplain
#*.docx diff=astextplain
#*.DOCX diff=astextplain
#*.dot diff=astextplain
#*.DOT diff=astextplain
#*.pdf diff=astextplain
#*.PDF diff=astextplain
#*.rtf diff=astextplain
#*.RTF diff=astextplain
================================================
FILE: .gitignore
================================================
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
# User-specific files
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
build/
bld/
[Bb]in/
[Oo]bj/
# Visual Studo 2015 cache/options directory
.vs/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUNIT
*.VisualState.xml
TestResult.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
*_i.c
*_p.c
*_i.h
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opensdf
*.sdf
*.cachefile
# Visual Studio profiler
*.psess
*.vsp
*.vspx
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# JustCode is a .NET coding addin-in
.JustCode
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# NCrunch
_NCrunch_*
.*crunch*.local.xml
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# TODO: Comment the next line if you want to checkin your web deploy settings
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# NuGet Packages
*.nupkg
# The packages folder can be ignored because of Package Restore
**/packages/*
# except build/, which is used as an MSBuild target.
!**/packages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/packages/repositories.config
# Windows Azure Build Output
csx/
*.build.csdef
# Windows Store app package directory
AppPackages/
# Others
*.[Cc]ache
ClientBin/
[Ss]tyle[Cc]op.*
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.pfx
*.publishsettings
node_modules/
bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
# SQL Server files
*.mdf
*.ldf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
# Microsoft Fakes
FakesAssemblies/
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Private file
/nuget-sign.bat
================================================
FILE: CHANGELOG.md
================================================
# Changelog
## 1.3.0
* Build for .NET 4.5.2, 4.6.1 and standard 1.6, thanks [igitur](https://github.com/spreadsheetlab/XLParser/pull/61).
* Remove embedded Irony dependency in favor of [daxnet](https://github.com/daxnet)s [updated fork](https://github.com/daxnet/irony).
## 1.2.4
Reference implementation of the Excel grammar published in the Journal of Systems and Software SCAM special issue paper "A Grammar for Spreadsheet Formulas Evaluated on Two Large Datasets" by E. Aivaloglou, D. Hoepelman and F. Hermans.
* Fixed several errors in which names/named ranges were allowed
* Question marks are now allowed
* Can now start with all unicode letters (e.g. `=äbc`)
* Corrected characters which are allowed if the name starts with a cell name or TRUE/FALSE (e.g. `=A1.MYNAME`)
* Allow for whitespace-only sheetnames (e.g. `=' '!A1`), altough they will always be returned as `" "` by `PrefixInfo`
* Made some corrections in how multiple sheet references (`=Sheet1:Sheet3!A1`) are parsed
* Removed escape sequences in strings (e.g. `"Line1\nLine2"`) as these are not part of the Excel formula language
* Added support for structured references to a complete table (e.g. `=MyTable[]`)
## 1.2.3
* Adds support for special characters in structured references.
## 1.2.2
* Adds equality to `PrefixInfo` class
* Fixes parse error if external reference file path contains a space (`='C:\My Dir\[file.xlsx]Sheet'!A1`)
* `ExcelFormulaParser.SkipToRelevant` no longer skips references without a prefix.
This is a breaking change, but the old behavior is arguably a bug. An argument is added to restore old behavior, defaults to new behavior.
## 1.2.1
* Adds `GetReferenceNodes` method to `ExcelFormulaParser`
## 1.2
Fixes [#16](https://github.com/PerfectXL/XLParser/issues/16), [#17](https://github.com/PerfectXL/XLParser/issues/17), [#19](https://github.com/PerfectXL/XLParser/issues/3)
* Made it easier to modify the grammar in your own class by extending the grammar class
* Can now parse non-numeric filenames (`=[file]Sheet!A1`)
* Parsing of the `Prefix` nonterminal is changed and is now a little bit more uniform. `ExcelFormulaParser.GetPrefixInfo` gives prefix information in an easy to use format.
* Can now parse [Structured References](https://support.office.com/en-us/article/Using-structured-references-with-Excel-tables-f5ed2452-2337-4f71-bed3-c8ae6d2b276e). See [#16](https://github.com/PerfectXL/XLParser/issues/16) for caveats.
* You can now select the XLParser version to use in the web demo
## 1.1.4
* Added some missing methods that test for specific types of operators
* Added tests and fixes if necessary for methods that were missing tests
## 1.1.3
Reference implementation of the Excel grammar published in the paper "A Grammar for Spreadsheet Formulas Evaluated on Two Large Datasets" by E. Aivaloglou, D. Hoepelman and F. Hermans.
* Added all formulas from EUSES and Enron datasets and tests to check if they all parse
* Made parser thread safe
* Fixed [#9](https://github.com/PerfectXL/XLParser/issues/9): some tokens which would not accept all unicode characters (like UDF) now do so
* `'Sheet1:Sheet5'` will now correctly parse as `MULTIPLESHEETS` instead of a single sheet
## 1.1.2
Fixed [#1](https://github.com/PerfectXL/XLParser/issues/1), [#2](https://github.com/PerfectXL/XLParser/issues/2), [#4](https://github.com/PerfectXL/XLParser/issues/4).
* Added a web demo in app/XLParser.Web which generates parse tree images
* All UDF's now use the same nonterminal
* Non-Prefixed UDFs can now be part of a reference expression
* IF and CHOOSE functions can now be part of a reference expression
* Reference functions INDEX,OFFSET and INDIRECT can no longer have a prefix
* Operator precedence for reference operators (: , and intersection) is now correct
* Fixed printing of reference operators
## 1.0.0
First public release.
Corresponds to pre-print/reviewer version of the paper
================================================
FILE: LICENSE.md
================================================
Mozilla Public License, version 2.0
1. Definitions
1.1. "Contributor"
means each individual or legal entity that creates, contributes to the
creation of, or owns Covered Software.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used by a
Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
means Covered Software of a particular Contributor.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached the
notice in Exhibit A, the Executable Form of such Source Code Form, and
Modifications of such Source Code Form, in each case including portions
thereof.
1.5. "Incompatible With Secondary Licenses"
means
a. that the initial Contributor has attached the notice described in
Exhibit B to the Covered Software; or
b. that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the terms of
a Secondary License.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in a
separate file or files, that is not Covered Software.
1.8. "License"
means this document.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible, whether
at the time of the initial grant or subsequently, any and all of the
rights conveyed by this License.
1.10. "Modifications"
means any of the following:
a. any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered Software; or
b. any new file in Source Code Form that contains any Covered Software.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the License,
by the making, using, selling, offering for sale, having made, import,
or transfer of either its Contributions or its Contributor Version.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU Lesser
General Public License, Version 2.1, the GNU Affero General Public
License, Version 3.0, or any later versions of those licenses.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that controls, is
controlled by, or is under common control with You. For purposes of this
definition, "control" means (a) the power, direct or indirect, to cause
the direction or management of such entity, whether by contract or
otherwise, or (b) ownership of more than fifty percent (50%) of the
outstanding shares or beneficial ownership of such entity.
2. License Grants and Conditions
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
a. under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
b. under Patent Claims of such Contributor to make, use, sell, offer for
sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
a. for any code that a Contributor has removed from Covered Software; or
b. for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
c. under Patent Claims infringed by Covered Software in the absence of
its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights to
grant the rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
Section 2.1.
3. Responsibilities
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
a. such Covered Software must also be made available in Source Code Form,
as described in Section 3.1, and You must inform recipients of the
Executable Form how they can obtain a copy of such Source Code Form by
reasonable means in a timely manner, at a charge no more than the cost
of distribution to the recipient; and
b. You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter the
recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
3.4. Notices
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty, or
limitations of liability) contained within the Source Code Form of the
Covered Software, except that You may alter any license notices to the
extent required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
If it is impossible for You to comply with any of the terms of this License
with respect to some or all of the Covered Software due to statute,
judicial order, or regulation then You must: (a) comply with the terms of
this License to the maximum extent possible; and (b) describe the
limitations and the code they affect. Such description must be placed in a
text file included with all distributions of the Covered Software under
this License. Except to the extent prohibited by statute or regulation,
such description must be sufficiently detailed for a recipient of ordinary
skill to be able to understand it.
5. Termination
5.1. The rights granted under this License will terminate automatically if You
fail to comply with any of its terms. However, if You become compliant,
then the rights granted under this License from a particular Contributor
are reinstated (a) provisionally, unless and until such Contributor
explicitly and finally terminates Your grants, and (b) on an ongoing
basis, if such Contributor fails to notify You of the non-compliance by
some reasonable means prior to 60 days after You have come back into
compliance. Moreover, Your grants from a particular Contributor are
reinstated on an ongoing basis if such Contributor notifies You of the
non-compliance by some reasonable means, this is the first time You have
received notice of non-compliance with this License from such
Contributor, and You become compliant prior to 30 days after Your receipt
of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
license agreements (excluding distributors and resellers) which have been
validly granted by You or Your distributors under this License prior to
termination shall survive termination.
6. Disclaimer of Warranty
Covered Software is provided under this License on an "as is" basis,
without warranty of any kind, either expressed, implied, or statutory,
including, without limitation, warranties that the Covered Software is free
of defects, merchantable, fit for a particular purpose or non-infringing.
The entire risk as to the quality and performance of the Covered Software
is with You. Should any Covered Software prove defective in any respect,
You (not any Contributor) assume the cost of any necessary servicing,
repair, or correction. This disclaimer of warranty constitutes an essential
part of this License. No use of any Covered Software is authorized under
this License except under this disclaimer.
7. Limitation of Liability
Under no circumstances and under no legal theory, whether tort (including
negligence), contract, or otherwise, shall any Contributor, or anyone who
distributes Covered Software as permitted above, be liable to You for any
direct, indirect, special, incidental, or consequential damages of any
character including, without limitation, damages for lost profits, loss of
goodwill, work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses, even if such party shall have been
informed of the possibility of such damages. This limitation of liability
shall not apply to liability for death or personal injury resulting from
such party's negligence to the extent applicable law prohibits such
limitation. Some jurisdictions do not allow the exclusion or limitation of
incidental or consequential damages, so this exclusion and limitation may
not apply to You.
8. Litigation
Any litigation relating to this License may be brought only in the courts
of a jurisdiction where the defendant maintains its principal place of
business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions. Nothing
in this Section shall prevent a party's ability to bring cross-claims or
counter-claims.
9. Miscellaneous
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides that
the language of a contract shall be construed against the drafter shall not
be used to construe this License against a Contributor.
10. Versions of the License
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses If You choose to distribute Source Code Form that is
Incompatible With Secondary Licenses under the terms of this version of
the License, the notice described in Exhibit B of this License must be
attached.
Exhibit A - Source Code Form License Notice
This Source Code Form is subject to the
terms of the Mozilla Public License, v.
2.0. If a copy of the MPL was not
distributed with this file, You can
obtain one at
http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular file,
then You may include the notice in a location (such as a LICENSE file in a
relevant directory) where a recipient would be likely to look for such a
notice.
You may add additional accurate notices of copyright ownership.
================================================
FILE: README.md
================================================
# XLParser
A C# Excel formula parser with the following properties:
* **High compatiblity**
XLParser has been tested on over a million real-world formulas and has a 99.9% succesful parse rate.
* **Compact parse trees**
XLParser was designed for and is used in research of Excel spreadsheets and refactoring, all of which are easier if parse trees are smaller
* **Compact grammar**
[Our grammar](https://github.com/PerfectXL/XLParser/blob/master/src/XLParser/ExcelFormulaGrammar.cs) contains less than 100 tokens and production rules, and is thus easy to implement in another language or parser generator.
### Quickstart
You can parse formulas and view parse trees in [the online demo](https://xlparser.perfectxl.nl/demo).
**or**
Use [the NuGet package](https://www.nuget.org/packages/XLParser/).
**or**
1. Download the [latest release](https://github.com/PerfectXL/XLParser/releases/latest)
2. Extract somewhere convenient
3. Build Irony GrammarExplorer, see the Debugging section below, and open it
4. Click on the `...` button at the top and select `Add Grammar`
5. Point to the `XLParser.dll` file in the folder you extracted and click ok
6. You can now parse formulas and see the trees in the `Test` tab
## Background
XLParser is the reference implementation of the Excel grammar published in the paper ["A Grammar for Spreadsheet Formulas Evaluated on Two Large Datasets" by Efthimia Aivaloglou, David Hoepelman and Felienne Hermans](https://fenia266781730.files.wordpress.com/2019/01/07335408.pdf).
XLParser can parse Excel formulas and is intended to facilitate the analysis of spreadsheet formulas, and for that purpose produces compact parse trees.
XLParser has a 99.99% success rate on the [Enron](http://www.felienne.com/archives/3634) and [EUSES](http://eusesconsortium.org/resources.php) datasets.
Note however that XLParser is not very restrictive, and thus might parse formulas that Excel would reject as invalid, keep this in mind when parsing user input with XLParser.
XLParser is based on the C# [Irony parser framework](https://github.com/IronyProject/Irony).
## Using XLParser
### Building
Open the `XLParser.sln` file in `src/` in Visual Studio and press build. The dependencies are already included in compiled form in this repository.
### Using in your project
The easiest way to add the dependency to your project is with [NuGet](https://www.nuget.org/packages/XLParser/)
The `ExcelFormulaParser` class is your main entry point. You can parse a formula through `ExcelFormulaParser.Parse("yourformula")`.
`ExcelFormulaParser` has several useful methods that operate directly on the parse tree like `AllNodes` to traverse the whole tree or `GetFunction` to get the function name of a node that represents a function call. You can `Print` any node.
`FormulaAnalyzer` provides functionality for analyzing the parse tree. For example, `FormulaAnalyzer.ParserReferences()` returns all references (to cells, cell ranges, named ranges, horizontal or vertical ranges) that the formula contains.
### Debugging
Irony, the parser framework XLParser uses, includes a tool called the "grammar explorer". This is a great way to play around with the grammar and parse trees.
To use this tool, you first need to build it once by opening the IronyExplorer solution (`lib/IronyExplorer/IronyExplorer.sln`) and building it with release configuration. After that you can use the binary in `lib/IronyExplorer/src/IronyExplorer.GrammarExplorer/bin/Release/IronyExplorer.GrammarExplorer.exe`.
To load the XLParser grammar, first make sure you have built XLParser. Then open the GrammarExplorer and add the grammar (`...` button) from `src/XLParser/bin/Debug/XLParser.dll`.
In Visual Studio you can see the printed version of any node during debugging by adding `yournode.Print(),ac` in the watch window.
## Documentation
Browse the [API documentation](http://spreadsheetlab.github.io/XLParser/api/index.html)
See [the doc folder](doc) for further documentation.
## License
All files of XLParser are released under the [Mozilla Public License 2.0](License.md).
================================================
FILE: app/XLParser apps.sln
================================================
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.23107.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "XLParser Web", "XLParser.Web\XLParser Web.csproj", "{A24BD58E-5958-4990-8836-958C52A6AA02}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{A24BD58E-5958-4990-8836-958C52A6AA02}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A24BD58E-5958-4990-8836-958C52A6AA02}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A24BD58E-5958-4990-8836-958C52A6AA02}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A24BD58E-5958-4990-8836-958C52A6AA02}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
================================================
FILE: app/XLParser.Web/.gitignore
================================================
deploy
================================================
FILE: app/XLParser.Web/Default.aspx
================================================
<%@ Page Language="C#" AutoEventWireup="true" CodeBehind="Default.aspx.cs" Inherits="XLParser.Web.Default" %>
XLParser web demo
Formula:
Version:
1.7.5
1.7.4
1.7.3
1.7.2
1.7.1
1.7.0
1.6.3
1.6.2
1.6.1
1.6.0
1.5.2
1.5.1
1.5.0
1.4.2
1.4.1
1.3.9
1.2.0
1.1.4
1.0.0
Parse
Found a bug?
Great!
Please report it as a Github issue!
If the bug is with a specific formula/excel file, please include that too.
Generally bugs in XLParser are one of the following, please include this type in the report:
The parser can't parse a formula that Excel accepts
The parser parses a formula that Excel doesn't accept.
The parser interprets a formula wrong, that is it produces a parse tree that doesn't correspond with how Excel behaves.
There is a "normal" bug in the code around the core parser.
================================================
FILE: app/XLParser.Web/Default.aspx.cs
================================================
using System;
using System.Web.UI;
namespace XLParser.Web
{
public partial class Default : Page
{
protected void Page_Load(object sender, EventArgs e) { }
}
}
================================================
FILE: app/XLParser.Web/Default.aspx.designer.cs
================================================
//------------------------------------------------------------------------------
//
// This code was generated by a tool.
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
//------------------------------------------------------------------------------
namespace XLParser.Web {
public partial class Default {
}
}
================================================
FILE: app/XLParser.Web/Parse.cs
================================================
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
using System.Web;
using Irony.Parsing;
using Newtonsoft.Json;
using XLParser.Web.XLParserVersions.v100;
namespace XLParser.Web
{
public class Parse : IHttpHandler
{
private HttpContext _httpContext;
private void WriteResponse(string s)
{
_httpContext.Response.Write(s);
}
private static readonly bool DisableCache =
#if(DEBUG)
true;
#else
false;
#endif
private const string LatestVersion = "175";
public void ProcessRequest(HttpContext context)
{
_httpContext = context;
if (!DisableCache && context.Request.Params["nocache"] != "true")
{
context.Response.Cache.SetCacheability(HttpCacheability.Public);
context.Response.Cache.SetExpires(DateTime.Now.AddMinutes(5));
context.Response.Cache.SetMaxAge(new TimeSpan(0, 0, 5));
}
context.Response.AddHeader("Access-Control-Allow-Origin", "*");
// Dynamically load a library version
var xlParserVersion = context.Request.Params["version"] ?? LatestVersion;
if (!Regex.IsMatch(xlParserVersion, @"^[0-9]{3,4}[\-a-z0-9]*$"))
{
context.Response.StatusCode = (int) HttpStatusCode.BadRequest;
_httpContext.Response.ContentType = "text/plain";
WriteResponse("Invalid version");
context.Response.End();
return;
}
try
{
LoadXlParserVersion(xlParserVersion);
}
catch (ArgumentException)
{
context.Response.StatusCode = (int) HttpStatusCode.NotFound;
_httpContext.Response.ContentType = "text/plain";
WriteResponse("Version doesn't exist");
context.Response.End();
return;
}
// We want to actually give meaningful HTTP error codes and not have IIS interfere
context.Response.TrySkipIisCustomErrors = true;
// check file extension for format
var format = (Path.GetExtension(context.Request.FilePath) ?? ".json").TrimStart('.');
var formula = context.Request.Unvalidated["formula"];
switch (format)
{
case "json":
ParseToJson(formula);
break;
default:
context.Response.StatusCode = 415;
_httpContext.Response.ContentType = "text/plain";
WriteResponse($"Format '{format}' not supported.");
context.Response.End();
break;
}
}
private void ParseToJson(string formula)
{
_httpContext.Response.ContentType = "application/json";
if (formula == null)
{
_httpContext.Response.StatusCode = 400;
WriteResponse(JsonConvert.SerializeObject(new {error = "no formula supplied"}));
_httpContext.Response.End();
return;
}
ParseTreeNode root;
try
{
//root = XLParser.ExcelFormulaParser.Parse(formula);
root = _parse(formula);
}
catch (ArgumentException)
{
// Parse error, return 422 - Unprocessable Entity
_httpContext.Response.StatusCode = 422;
ParseTree r = new Parser((Grammar) Activator.CreateInstance(_grammar)).Parse(formula);
WriteResponse(JsonConvert.SerializeObject(new
{
error = "Parse error",
formula,
message = r.ParserMessages.Select(m => new
{
level = m.Level.ToString(),
line = m.Location.Line + 1,
column = m.Location.Column + 1,
msg = m.Message
}).FirstOrDefault()
}));
_httpContext.Response.End();
return;
}
WriteResponse(JsonConvert.SerializeObject(ToJson(root), Formatting.Indented, new JsonSerializerSettings
{
NullValueHandling = NullValueHandling.Ignore
}));
_httpContext.Response.End();
}
private JsonNode ToJson(ParseTreeNode node)
{
return new JsonNode
{
name = NodeText(node),
children = node.ChildNodes.Count == 0 ? null : node.ChildNodes.Select(ToJson)
};
}
[SuppressMessage("ReSharper", "InconsistentNaming")]
private class JsonNode
{
public IEnumerable children;
public string name;
}
private string NodeText(ParseTreeNode node)
{
if (node.Term is NonTerminal)
{
return node.Term.Name;
}
// These are simple terminals like + or =, just print them
// For other terminals, print the terminal name + contents
return node.Term.Name.Length <= 2 ? _print(node) : $"{node.Term.Name}[\"{_print(node)}\"]";
}
private Func _parse;
private Func _print;
private Type _grammar;
// Yes, this is f-ugly. Better solutions were tried (dynamically loading through reflection, extern alias and separate AppDomains) but failed.
// Mainly this is because .NET is very very picky about loading multiple versions of libraries with the same name
private void LoadXlParserVersion(string version)
{
switch (version)
{
case "100":
_parse = ExcelFormulaParser.Parse;
_print = ExcelFormulaParser.Print;
_grammar = typeof(ExcelFormulaGrammar);
break;
case "114":
_parse = XLParserVersions.v114.ExcelFormulaParser.Parse;
_print = XLParserVersions.v114.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v114.ExcelFormulaGrammar);
break;
case "120":
_parse = XLParserVersions.v120.ExcelFormulaParser.Parse;
_print = XLParserVersions.v120.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v120.ExcelFormulaGrammar);
break;
case "139":
_parse = XLParserVersions.v139.ExcelFormulaParser.Parse;
_print = XLParserVersions.v139.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v139.ExcelFormulaGrammar);
break;
case "141":
_parse = XLParserVersions.v141.ExcelFormulaParser.Parse;
_print = XLParserVersions.v141.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v141.ExcelFormulaGrammar);
break;
case "142":
_parse = XLParserVersions.v142.ExcelFormulaParser.Parse;
_print = XLParserVersions.v142.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v142.ExcelFormulaGrammar);
break;
case "150":
_parse = XLParserVersions.v150.ExcelFormulaParser.Parse;
_print = XLParserVersions.v150.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v150.ExcelFormulaGrammar);
break;
case "151":
_parse = XLParserVersions.v151.ExcelFormulaParser.Parse;
_print = XLParserVersions.v151.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v151.ExcelFormulaGrammar);
break;
case "152":
_parse = XLParserVersions.v152.ExcelFormulaParser.Parse;
_print = XLParserVersions.v152.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v152.ExcelFormulaGrammar);
break;
case "160":
_parse = XLParserVersions.v160.ExcelFormulaParser.Parse;
_print = XLParserVersions.v160.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v160.ExcelFormulaGrammar);
break;
case "161":
_parse = XLParserVersions.v161.ExcelFormulaParser.Parse;
_print = XLParserVersions.v161.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v161.ExcelFormulaGrammar);
break;
case "162":
_parse = XLParserVersions.v162.ExcelFormulaParser.Parse;
_print = XLParserVersions.v162.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v162.ExcelFormulaGrammar);
break;
case "163":
_parse = XLParserVersions.v163.ExcelFormulaParser.Parse;
_print = XLParserVersions.v163.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v163.ExcelFormulaGrammar);
break;
case "170":
_parse = XLParserVersions.v170.ExcelFormulaParser.Parse;
_print = XLParserVersions.v170.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v170.ExcelFormulaGrammar);
break;
case "171":
_parse = XLParserVersions.v171.ExcelFormulaParser.Parse;
_print = XLParserVersions.v171.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v171.ExcelFormulaGrammar);
break;
case "172":
_parse = XLParserVersions.v172.ExcelFormulaParser.Parse;
_print = XLParserVersions.v172.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v172.ExcelFormulaGrammar);
break;
case "173":
_parse = XLParserVersions.v173.ExcelFormulaParser.Parse;
_print = XLParserVersions.v173.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v173.ExcelFormulaGrammar);
break;
case "174":
_parse = XLParserVersions.v174.ExcelFormulaParser.Parse;
_print = XLParserVersions.v174.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v174.ExcelFormulaGrammar);
break;
case "175":
_parse = XLParserVersions.v175.ExcelFormulaParser.Parse;
_print = XLParserVersions.v175.ExcelFormulaParser.Print;
_grammar = typeof(XLParserVersions.v175.ExcelFormulaGrammar);
break;
default:
throw new ArgumentException($"Version {version} doesn't exist");
}
}
public bool IsReusable => true;
}
}
================================================
FILE: app/XLParser.Web/Properties/AssemblyInfo.cs
================================================
using System.Reflection;
using System.Runtime.InteropServices;
[assembly: ComVisible(false)]
[assembly: Guid("ffb6e35e-f708-485b-996e-f1f0a54fffb7")]
[assembly: AssemblyVersion("1.7.5.0")]
================================================
FILE: app/XLParser.Web/Properties/Resources.Designer.cs
================================================
//------------------------------------------------------------------------------
//
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
//
//------------------------------------------------------------------------------
namespace XLParser.Web.Properties {
using System;
///
/// A strongly-typed resource class, for looking up localized strings, etc.
///
// This class was auto-generated by the StronglyTypedResourceBuilder
// class via a tool like ResGen or Visual Studio.
// To add or remove a member, edit your .ResX file then rerun ResGen
// with the /str option, or rebuild your VS project.
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "17.0.0.0")]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
internal class Resources {
private static global::System.Resources.ResourceManager resourceMan;
private static global::System.Globalization.CultureInfo resourceCulture;
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
internal Resources() {
}
///
/// Returns the cached ResourceManager instance used by this class.
///
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Resources.ResourceManager ResourceManager {
get {
if (object.ReferenceEquals(resourceMan, null)) {
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("XLParser.Web.Properties.Resources", typeof(Resources).Assembly);
resourceMan = temp;
}
return resourceMan;
}
}
///
/// Overrides the current thread's CurrentUICulture property for all
/// resource lookups using this strongly typed resource class.
///
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Globalization.CultureInfo Culture {
get {
return resourceCulture;
}
set {
resourceCulture = value;
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ADDRESS
///AMORDEGRC
///AMORLINC
///AND
///AREAS
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOMDIST
///CALL
///CEILING
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHITEST
///CLEAN
///CODE
///COLUMN
///COLUMNS
///COMBIN
///COMPLEX
///CONCATENATE
///CONFIDENCE
///CONVERT
///CORREL
///COS
///COSH
///COUNT
///COUNTA
///COUNTBLANK
///COUNTIF
///COUNTIFS
///COUPDAYBS
///COUPDAYS
///COUPDAYSNC
///COUPNCD
///C [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v120 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList-v120", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIST
///CHISQ.DIST [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v139 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList-v139", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIST
///CHISQ.DIST [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v141 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v141", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIST
///CHISQ.DIST [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v142 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v142", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v150 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v150", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v151 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v151", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v152 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v152", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v160 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v160", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v161 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v161", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v162 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v162", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v163 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v163", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v170 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v170", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v171 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v171", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v172 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v172", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v173 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v173", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v174 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v174", resourceCulture);
}
}
///
/// Looks up a localized string similar to ABS
///ACCRINT
///ACCRINTM
///ACOS
///ACOSH
///ACOT
///ACOTH
///ADDRESS
///AGGREGATE
///AMORDEGRC
///AMORLINC
///AND
///ARABIC
///AREAS
///ARRAYTOTEXT
///ASC
///ASIN
///ASINH
///ATAN
///ATAN2
///ATANH
///AVEDEV
///AVERAGE
///AVERAGEA
///AVERAGEIF
///AVERAGEIFS
///BAHTTEXT
///BASE
///BESSELI
///BESSELJ
///BESSELK
///BESSELY
///BETA.DIST
///BETA.INV
///BETADIST
///BETAINV
///BIN2DEC
///BIN2HEX
///BIN2OCT
///BINOM.DIST
///BINOM.DIST.RANGE
///BINOM.INV
///BINOMDIST
///BITAND
///BITLSHIFT
///BITOR
///BITRSHIFT
///BITXOR
///CALL
///CEILING
///CEILING.MATH
///CEILING.PRECISE
///CELL
///CHAR
///CHIDIST
///CHIINV
///CHISQ.DIS [rest of string was truncated]";.
///
internal static string ExcelBuiltinFunctionList_v175 {
get {
return ResourceManager.GetString("ExcelBuiltinFunctionList_v175", resourceCulture);
}
}
}
}
================================================
FILE: app/XLParser.Web/Properties/Resources.resx
================================================
text/microsoft-resx
2.0
System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
..\Resources\ExcelBuiltinFunctionList-v120.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\resources\excelbuiltinfunctionlist-v139.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v141.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v142.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v150.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v151.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v152.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\resources\excelbuiltinfunctionlist-v160.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\resources\excelbuiltinfunctionlist-v161.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\resources\excelbuiltinfunctionlist-v162.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;iso-8859-1
..\resources\excelbuiltinfunctionlist-v163.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v170.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v171.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v172.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v173.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v174.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
..\Resources\ExcelBuiltinFunctionList-v175.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v120.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ADDRESS
AMORDEGRC
AMORLINC
AND
AREAS
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BESSELI
BESSELJ
BESSELK
BESSELY
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOMDIST
CALL
CEILING
CELL
CHAR
CHIDIST
CHIINV
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMPLEX
CONCATENATE
CONFIDENCE
CONVERT
CORREL
COS
COSH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
CRITBINOM
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEVALUE
DAVERAGE
DAY
DAYS360
DB
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATEEFFECT
EOMONTH
ERF
ERFC
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPONDIST
FACT
FACTDOUBLE
FALSE
FDIST
FIND
FINV
FISHER
FISHERINV
FIXED
FLOOR
FORECAST
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMADIST
GAMMAINV
GAMMALN
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOMDIST
ISBLANK
IFERROR
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSIN
IMSQRT
IMSUB
IMSUM
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
IS
ISB
ISERROR
ISNA
ISNUMBER
ISPMT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MONTH
MROUND
MULTINOMIAL
N
NA
NEGBINOMDIST
NETWORKDAYS
NOMINAL
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PEARSON
PERCENTILE
PERCENTRANK
PERMUT
PHONETIC
PI
PMT
POISSON
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUOTIENT
RADIANS
RAND
RANDBETWEEN
RANK
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RSQ
RTD
SEARCH
SEARCHB
SECOND
SERIESSUM
SIGN
SIN
SINH
SKEW
SLN
SLOPE
SMALL
SQL.REQUEST
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SYD
T
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UPPER
VALUE
VAR
VARA
VARP
VARPA
VDB
VLOOKUP
WEEKDAY
WEEKNUM
WEIBULL
WORKDAY
XIRR
XNPV
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v139.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
ISBLANK
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
IS
ISB
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SQL.REQUEST
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UPPER
VALUE
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v141.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
ISBLANK
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
IS
ISB
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SQL.REQUEST
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UPPER
VALUE
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v142.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
ISBLANK
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
IS
ISB
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SQL.REQUEST
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UPPER
VALUE
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v150.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v151.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v152.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v160.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v161.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v162.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v163.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v170.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v171.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v172.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v173.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v174.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v175.txt
================================================
ABS
ACCRINT
ACCRINTM
ACOS
ACOSH
ACOT
ACOTH
ADDRESS
AGGREGATE
AMORDEGRC
AMORLINC
AND
ARABIC
AREAS
ARRAYTOTEXT
ASC
ASIN
ASINH
ATAN
ATAN2
ATANH
AVEDEV
AVERAGE
AVERAGEA
AVERAGEIF
AVERAGEIFS
BAHTTEXT
BASE
BESSELI
BESSELJ
BESSELK
BESSELY
BETA.DIST
BETA.INV
BETADIST
BETAINV
BIN2DEC
BIN2HEX
BIN2OCT
BINOM.DIST
BINOM.DIST.RANGE
BINOM.INV
BINOMDIST
BITAND
BITLSHIFT
BITOR
BITRSHIFT
BITXOR
CALL
CEILING
CEILING.MATH
CEILING.PRECISE
CELL
CHAR
CHIDIST
CHIINV
CHISQ.DIST
CHISQ.DIST.RT
CHISQ.INV
CHISQ.INV.RT
CHISQ.TEST
CHITEST
CLEAN
CODE
COLUMN
COLUMNS
COMBIN
COMBINA
COMPLEX
CONCAT
CONCATENATE
CONFIDENCE
CONFIDENCE.NORM
CONFIDENCE.T
CONVERT
CORREL
COS
COSH
COT
COTH
COUNT
COUNTA
COUNTBLANK
COUNTIF
COUNTIFS
COUPDAYBS
COUPDAYS
COUPDAYSNC
COUPNCD
COUPNUM
COUPPCD
COVAR
COVARIANCE.P
COVARIANCE.S
CRITBINOM
CSC
CSCH
CUBEKPIMEMBER
CUBEMEMBER
CUBEMEMBERPROPERTY
CUBERANKEDMEMBER
CUBESET
CUBESETCOUNT
CUBEVALUE
CUMIPMT
CUMPRINC
DATE
DATEDIF
DATEVALUE
DAVERAGE
DAY
DAYS
DAYS360
DB
DBCS
DCOUNT
DCOUNTA
DDB
DEC2BIN
DEC2HEX
DEC2OCT
DECIMAL
DEGREES
DELTA
DEVSQ
DGET
DISC
DMAX
DMIN
DOLLAR
DOLLARDE
DOLLARFR
DPRODUCT
DSTDEV
DSTDEVP
DSUM
DURATION
DVAR
DVARP
EDATE
EFFECT
ENCODEURL
EOMONTH
ERF
ERF.PRECISE
ERFC
ERFC.PRECISE
ERROR.TYPE
EUROCONVERT
EVEN
EXACT
EXP
EXPON.DIST
EXPONDIST
F.DIST
F.DIST.RT
F.INV
F.INV.RT
F.TEST
FACT
FACTDOUBLE
FALSE
FDIST
FILTER
FILTERXML
FIND
FINDB
FINV
FISHER
FISHERINV
FIXED
FLOOR
FLOOR.MATH
FLOOR.PRECISE
FORECAST
FORECAST.ETS
FORECAST.ETS.CONFINT
FORECAST.ETS.SEASONALITY
FORECAST.ETS.STAT
FORECAST.LINEAR
FORMULATEXT
FREQUENCY
FTEST
FV
FVSCHEDULE
GAMMA
GAMMA.DIST
GAMMA.INV
GAMMADIST
GAMMAINV
GAMMALN
GAMMALN.PRECISE
GAUSS
GCD
GEOMEAN
GESTEP
GETPIVOTDATA
GROWTH
HARMEAN
HEX2BIN
HEX2DEC
HEX2OCT
HLOOKUP
HOUR
HYPERLINK
HYPGEOM.DIST
HYPGEOMDIST
IFERROR
IFNA
IFS
IMABS
IMAGINARY
IMARGUMENT
IMCONJUGATE
IMCOS
IMCOSH
IMCOT
IMCSC
IMCSCH
IMDIV
IMEXP
IMLN
IMLOG10
IMLOG2
IMPOWER
IMPRODUCT
IMREAL
IMSEC
IMSECH
IMSIN
IMSINH
IMSQRT
IMSUB
IMSUM
IMTAN
INFO
INT
INTERCEPT
INTRATE
IPMT
IRR
ISBLANK
ISERR
ISERROR
ISEVEN
ISFORMULA
ISLOGICAL
ISNA
ISNONTEXT
ISNUMBER
ISO.CEILING
ISODD
ISOWEEKNUM
ISPMT
ISREF
ISTEXT
JIS
KURT
LARGE
LCM
LEFT
LEFTB
LEN
LENB
LET
LINEST
LN
LOG
LOG10
LOGEST
LOGINV
LOGNORM.DIST
LOGNORM.INV
LOGNORMDIST
LOOKUP
LOWER
MATCH
MAX
MAXA
MAXIFS
MDETERM
MDURATION
MEDIAN
MID
MIDB
MIN
MINA
MINIFS
MINUTE
MINVERSE
MIRR
MMULT
MOD
MODE
MODE.MULT
MODE.SNGL
MONTH
MROUND
MULTINOMIAL
MUNIT
N
NA
NEGBINOM.DIST
NEGBINOMDIST
NETWORKDAYS
NETWORKDAYS.INTL
NOMINAL
NORM.DIST
NORM.INV
NORM.S.DIST
NORM.S.INV
NORMDIST
NORMINV
NORMSDIST
NORMSINV
NOT
NOW
NPER
NPV
NUMBERVALUE
OCT2BIN
OCT2DEC
OCT2HEX
ODD
ODDFPRICE
ODDFYIELD
ODDLPRICE
ODDLYIELD
OR
PDURATION
PEARSON
PERCENTILE
PERCENTILE.EXC
PERCENTILE.INC
PERCENTRANK
PERCENTRANK.EXC
PERCENTRANK.INC
PERMUT
PERMUTATIONA
PHI
PHONETIC
PI
PMT
POISSON
POISSON.DIST
POWER
PPMT
PRICE
PRICEDISC
PRICEMAT
PROB
PRODUCT
PROPER
PV
QUARTILE
QUARTILE.EXC
QUARTILE.INC
QUOTIENT
RADIANS
RAND
RANDARRAY
RANDBETWEEN
RANK
RANK.AVG
RANK.EQ
RATE
RECEIVED
REGISTER.ID
REPLACE
REPLACEB
REPT
RIGHT
RIGHTB
ROMAN
ROUND
ROUNDDOWN
ROUNDUP
ROW
ROWS
RRI
RSQ
RTD
SEARCH
SEARCHB
SEC
SECH
SECOND
SEQUENCE
SERIESSUM
SHEET
SHEETS
SIGN
SIN
SINH
SKEW
SKEW.P
SLN
SLOPE
SMALL
SORT
SORTBY
SQRT
SQRTPI
STANDARDIZE
STDEV
STDEV.P
STDEV.S
STDEVA
STDEVP
STDEVPA
STEYX
SUBSTITUTE
SUBTOTAL
SUM
SUMIF
SUMIFS
SUMPRODUCT
SUMSQ
SUMX2MY2
SUMX2PY2
SUMXMY2
SWITCH
SYD
T
T.DIST
T.DIST.2T
T.DIST.RT
T.INV
T.INV.2T
T.TEST
TAN
TANH
TBILLEQ
TBILLPRICE
TBILLYIELD
TDIST
TEXT
TEXTJOIN
TIME
TIMEVALUE
TINV
TODAY
TRANSPOSE
TREND
TRIM
TRIMMEAN
TRUE
TRUNC
TTEST
TYPE
UNICHAR
UNICODE
UNIQUE
UPPER
VALUE
VALUETOTEXT
VAR
VAR.P
VAR.S
VARA
VARP
VARPA
VDB
VLOOKUP
WEBSERVICE
WEEKDAY
WEEKNUM
WEIBULL
WEIBULL.DIST
WORKDAY
WORKDAY.INTL
XIRR
XLOOKUP
XMATCH
XNPV
XOR
YEAR
YEARFRAC
YIELD
YIELDDISC
YIELDMAT
Z.TEST
ZTEST
================================================
FILE: app/XLParser.Web/Web.Debug.config
================================================
================================================
FILE: app/XLParser.Web/Web.Release.config
================================================
================================================
FILE: app/XLParser.Web/Web.config
================================================
================================================
FILE: app/XLParser.Web/XLParser Web.csproj
================================================
Debug
AnyCPU
2.0
{A24BD58E-5958-4990-8836-958C52A6AA02}
{349c5851-65df-11da-9384-00065b846f21};{fae04ec0-301f-11d3-bf4b-00c04f79efbc}
Library
Properties
XLParser.Web
XLParser Web
v4.7.2
true
true
full
false
bin\
DEBUG;TRACE
prompt
4
pdbonly
true
bin\
TRACE
prompt
4
..\packages\Irony.1.2.0\lib\netstandard2.0\Irony.dll
..\packages\Newtonsoft.Json.12.0.3\lib\net45\Newtonsoft.Json.dll
Web.config
Web.config
Default.aspx
ASPXCodeBehind
Default.aspx
True
True
Resources.resx
ResXFileCodeGenerator
Resources.Designer.cs
10.0
$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)
True
True
39901
/
http://localhost:39901/
False
False
False
================================================
FILE: app/XLParser.Web/XLParserVersions/v100/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
namespace XLParser.Web.XLParserVersions.v100
{
[Language("Excel Formulas", "1.0.0", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
public ExcelFormulaGrammar() : base(false)
{
#region 1-Terminals
#region Symbols and operators
var comma = ToTerm(",");
var colon = ToTerm(":");
var semicolon = ToTerm(";");
var OpenParen = ToTerm("(");
var CloseParen = ToTerm(")");
var CloseSquareParen = ToTerm("]");
var OpenSquareParen = ToTerm("[");
var exclamationMark = ToTerm("!");
var CloseCurlyParen = ToTerm("}");
var OpenCurlyParen = ToTerm("{");
var mulop = ToTerm("*");
var plusop = ToTerm("+");
var divop = ToTerm("/");
var minop = ToTerm("-");
var concatop = ToTerm("&");
var expop = ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and it's default priority is low
var intersectop = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
var percentop = ToTerm("%");
var gtop = ToTerm(">");
var eqop = ToTerm("=");
var ltop = ToTerm("<");
var neqop = ToTerm("<>");
var gteop = ToTerm(">=");
var lteop = ToTerm("<=");
#endregion
#region Literals
var BoolToken = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE");
BoolToken.Priority = TerminalPriority.Bool;
var NumberToken = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None);
NumberToken.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt };
var TextToken = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak);
var ErrorToken = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A");
var RefErrorToken = ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
var UDFToken = new RegexBasedTerminal(GrammarNames.TokenUDF, "(_xll\\.)?[a-zA-Z0-9_.]+\\(");
UDFToken.Priority = TerminalPriority.UDF;
var ExcelRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(");
ExcelRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction;
var ExcelFunction = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + String.Join("|", excelFunctionList) +")\\(");
ExcelFunction.Priority = TerminalPriority.ExcelFunction;
// Using this instead of Empty allows a more accurate trees
var EmptyArgumentToken = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
var VRangeToken = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?[A-Z]{1,4}:[$]?[A-Z]{1,4}");
var HRangeToken = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
const string CellTokenRegex = "[$]?[A-Z]{1,4}[$]?[1-9][0-9]*";
var CellToken = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex);
CellToken.Priority = TerminalPriority.CellToken;
const string NamedRangeRegex = @"[A-Za-z\\_][\w\.]*";
var NamedRangeToken = new RegexBasedTerminal(GrammarNames.TokenNamedRange, NamedRangeRegex);
NamedRangeToken.Priority = TerminalPriority.NamedRange;
// To prevent e.g. "A1A1" being parsed as 2 celltokens
var NamedRangeCombinationToken = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, "(TRUE|FALSE|" + CellTokenRegex + ")" + NamedRangeRegex);
NamedRangeCombinationToken.Priority = TerminalPriority.NamedRangeCombination;
const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
var SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, sheetRegEx);
SheetToken.Priority = TerminalPriority.SheetToken;
const string firstSheetName = "[a-zA-Z0-9]+:";
var MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, firstSheetName + sheetRegEx);
MultipleSheetsToken.Priority = TerminalPriority.MultipleSheetsToken;
var FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, "[0-9]+");
FileToken.Priority = TerminalPriority.FileToken;;
var QuotedFileSheetToken = new RegexBasedTerminal(GrammarNames.TokenFileSheetQuoted, @"'\[\d+\]([" + singleQuotedContent + @"]|'')+'!");
QuotedFileSheetToken.Priority = TerminalPriority.QuotedFileToken;
var ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+");
ReservedNameToken.Priority = TerminalPriority.ReservedName;
var DDEToken = new RegexBasedTerminal(GrammarNames.TokenDDE, @"'([\[\]" + singleQuotedContent + @"]|'')+'");
#endregion
#region Punctuation
MarkPunctuation(exclamationMark);
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenSquareParen, CloseSquareParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#endregion
#region 2-NonTerminals
// Most nonterminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use nonterminals that have been defined previously
var Argument = new NonTerminal(GrammarNames.Argument);
var Arguments = new NonTerminal(GrammarNames.Arguments);
var ArrayColumns = new NonTerminal(GrammarNames.ArrayColumns);
var ArrayConstant = new NonTerminal(GrammarNames.ArrayConstant);
var ArrayFormula = new NonTerminal(GrammarNames.ArrayFormula);
var ArrayRows = new NonTerminal(GrammarNames.ArrayRows);
var Bool = new NonTerminal(GrammarNames.Bool);
var Cell = new NonTerminal(GrammarNames.Cell);
var Constant = new NonTerminal(GrammarNames.Constant);
var ConstantArray = new NonTerminal(GrammarNames.ConstantArray);
var DynamicDataExchange = new NonTerminal(GrammarNames.DynamicDataExchange);
var EmptyArgument = new NonTerminal(GrammarNames.EmptyArgument);
var Error = new NonTerminal(GrammarNames.Error);
var File = new NonTerminal(GrammarNames.File);
var Formula = new NonTerminal(GrammarNames.Formula);
var FormulaWithEq = new NonTerminal(GrammarNames.FormulaWithEq);
var Function = new NonTerminal(GrammarNames.Function);
var FunctionCall = new NonTerminal(GrammarNames.FunctionCall);
var HRange = new NonTerminal(GrammarNames.HorizontalRange);
var InfixOp = new NonTerminal(GrammarNames.TransientInfixOp);
var MultipleSheets = new NonTerminal(GrammarNames.MultipleSheets);
var NamedRange = new NonTerminal(GrammarNames.NamedRange);
var Number = new NonTerminal(GrammarNames.Number);
var PostfixOp = new NonTerminal(GrammarNames.TransientPostfixOp);
var Prefix = new NonTerminal(GrammarNames.Prefix);
var PrefixOp = new NonTerminal(GrammarNames.TransientPrefixOp);
var QuotedFileSheet = new NonTerminal(GrammarNames.QuotedFileSheet);
var Reference = new NonTerminal(GrammarNames.Reference);
var ReferenceFunction = new NonTerminal(GrammarNames.ReferenceFunction);
var ReferenceItem = new NonTerminal(GrammarNames.TransientReferenceItem);
var RefError = new NonTerminal(GrammarNames.RefError);
var ReservedName = new NonTerminal(GrammarNames.ReservedName);
var Sheet = new NonTerminal(GrammarNames.Sheet);
var Start = new NonTerminal(GrammarNames.TransientStart);
var Text = new NonTerminal(GrammarNames.Text);
var Union = new NonTerminal(GrammarNames.Union);
var VRange = new NonTerminal(GrammarNames.VerticalRange);
#endregion
#region 3-Rules
#region Base rules
Root = Start;
Start.Rule = FormulaWithEq
| Formula
| ArrayFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
//MarkTransient(Formula);
ReservedName.Rule = ReservedNameToken;
Constant.Rule = Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
Function + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
Function.Rule = ExcelFunction | UDFToken;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
//Arguments.Rule = Argument | Argument + comma + Arguments;
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
//MarkTransient(Argument);
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
//PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop;
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule = ReferenceItem
| Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| Prefix + UDFToken + Arguments + CloseParen
| DynamicDataExchange
;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| ReferenceFunction
| VRange
| HRange
| RefError
;
MarkTransient(ReferenceItem);
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
ReferenceFunction.Rule =
ExcelRefFunctionToken + Arguments + CloseParen;
QuotedFileSheet.Rule = QuotedFileSheetToken;
Sheet.Rule = SheetToken;
MultipleSheets.Rule = MultipleSheetsToken;
Cell.Rule = CellToken;
File.Rule = OpenSquareParen + FileToken + CloseSquareParen;
DynamicDataExchange.Rule = File + exclamationMark + DDEToken;
NamedRange.Rule = NamedRangeToken | NamedRangeCombinationToken;
Prefix.Rule =
Sheet
| File + Sheet
| File + exclamationMark
| QuotedFileSheet
| MultipleSheets
| File + MultipleSheets;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop);
RegisterOperators(Precedence.Reference, Associativity.Left, intersectop, colon);
RegisterOperators(Precedence.Reference, Associativity.Left, comma);
//RegisterOperators(Precedence.ParameterSeparator, comma);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
public const int Reference = 8;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick celltoken because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int NamedRange = -800;
public const int ReservedName = -700;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileToken = 1200;
public const int SheetToken = 1200;
public const int QuotedFileToken = 1200;
}
#endregion
#region Excel function list
private static readonly IList excelFunctionList = new List
{
"ABS",
"ACCRINT",
"ACCRINTM",
"ACOS",
"ACOSH",
"ADDRESS",
"AMORDEGRC",
"AMORLINC",
"AND",
"AREAS",
"ASC",
"ASIN",
"ASINH",
"ATAN",
"ATAN2",
"ATANH",
"AVEDEV",
"AVERAGE",
"AVERAGEA",
"AVERAGEIF",
"AVERAGEIFS",
"BAHTTEXT",
"BESSELI",
"BESSELJ",
"BESSELK",
"BESSELY",
"BETADIST",
"BETAINV",
"BIN2DEC",
"BIN2HEX",
"BIN2OCT",
"BINOMDIST",
"CALL",
"CEILING",
"CELL",
"CHAR",
"CHIDIST",
"CHIINV",
"CHITEST",
"CHOOSE",
"CLEAN",
"CODE",
"COLUMN",
"COLUMNS",
"COMBIN",
"COMPLEX",
"CONCATENATE",
"CONFIDENCE",
"CONVERT",
"CORREL",
"COS",
"COSH",
"COUNT",
"COUNTA",
"COUNTBLANK",
"COUNTIF",
"COUNTIFS",
"COUPDAYBS",
"COUPDAYS",
"COUPDAYSNC",
"COUPNCD",
"COUPNUM",
"COUPPCD",
"COVAR",
"CRITBINOM",
"CUBEKPIMEMBER",
"CUBEMEMBER",
"CUBEMEMBERPROPERTY",
"CUBERANKEDMEMBER",
"CUBESET",
"CUBESETCOUNT",
"CUBEVALUE",
"CUMIPMT",
"CUMPRINC",
"DATE",
"DATEVALUE",
"DAVERAGE",
"DAY",
"DAYS360",
"DB",
"DCOUNT",
"DCOUNTA",
"DDB",
"DEC2BIN",
"DEC2HEX",
"DEC2OCT",
"DEGREES",
"DELTA",
"DEVSQ",
"DGET",
"DISC",
"DMAX",
"DMIN",
"DOLLAR",
"DOLLARDE",
"DOLLARFR",
"DPRODUCT",
"DSTDEV",
"DSTDEVP",
"DSUM",
"DURATION",
"DVAR",
"DVARP",
"EDATEEFFECT",
"EOMONTH",
"ERF",
"ERFC",
"ERROR.TYPE",
"EUROCONVERT",
"EVEN",
"EXACT",
"EXP",
"EXPONDIST",
"FACT",
"FACTDOUBLE",
"FALSE",
"FDIST",
"FIND",
"FINV",
"FISHER",
"FISHERINV",
"FIXED",
"FLOOR",
"FORECAST",
"FREQUENCY",
"FTEST",
"FV",
"FVSCHEDULE",
"GAMMADIST",
"GAMMAINV",
"GAMMALN",
"GCD",
"GEOMEAN",
"GESTEP",
"GETPIVOTDATA",
"GROWTH",
"HARMEAN",
"HEX2BIN",
"HEX2DEC",
"HEX2OCT",
"HLOOKUP",
"HOUR",
"HYPERLINK",
"HYPGEOMDIST",
"IF",
"ISBLANK",
"IFERROR",
"IMABS",
"IMAGINARY",
"IMARGUMENT",
"IMCONJUGATE",
"IMCOS",
"IMDIV",
"IMEXP",
"IMLN",
"IMLOG10",
"IMLOG2",
"IMPOWER",
"IMPRODUCT",
"IMREAL",
"IMSIN",
"IMSQRT",
"IMSUB",
"IMSUM",
"INFO",
"INT",
"INTERCEPT",
"INTRATE",
"IPMT",
"IRR",
"IS",
"ISB",
"ISERROR",
"ISNA",
"ISNUMBER",
"ISPMT",
"JIS",
"KURT",
"LARGE",
"LCM",
"LEFT",
"LEFTB",
"LEN",
"LENB",
"LINEST",
"LN",
"LOG",
"LOG10",
"LOGEST",
"LOGINV",
"LOGNORMDIST",
"LOOKUP",
"LOWER",
"MATCH",
"MAX",
"MAXA",
"MDETERM",
"MDURATION",
"MEDIAN",
"MID",
"MIDB",
"MIN",
"MINA",
"MINUTE",
"MINVERSE",
"MIRR",
"MMULT",
"MOD",
"MODE",
"MONTH",
"MROUND",
"MULTINOMIAL",
"N",
"NA",
"NEGBINOMDIST",
"NETWORKDAYS",
"NOMINAL",
"NORMDIST",
"NORMINV",
"NORMSDIST",
"NORMSINV",
"NOT",
"NOW",
"NPER",
"NPV",
"OCT2BIN",
"OCT2DEC",
"OCT2HEX",
"ODD",
"ODDFPRICE",
"ODDFYIELD",
"ODDLPRICE",
"ODDLYIELD",
"OR",
"PEARSON",
"PERCENTILE",
"PERCENTRANK",
"PERMUT",
"PHONETIC",
"PI",
"PMT",
"POISSON",
"POWER",
"PPMT",
"PRICE",
"PRICEDISC",
"PRICEMAT",
"PROB",
"PRODUCT",
"PROPER",
"PV",
"QUARTILE",
"QUOTIENT",
"RADIANS",
"RAND",
"RANDBETWEEN",
"RANK",
"RATE",
"RECEIVED",
"REGISTER.ID",
"REPLACE",
"REPLACEB",
"REPT",
"RIGHT",
"RIGHTB",
"ROMAN",
"ROUND",
"ROUNDDOWN",
"ROUNDUP",
"ROW",
"ROWS",
"RSQ",
"RTD",
"SEARCH",
"SEARCHB",
"SECOND",
"SERIESSUM",
"SIGN",
"SIN",
"SINH",
"SKEW",
"SLN",
"SLOPE",
"SMALL",
"SQL.REQUEST",
"SQRT",
"SQRTPI",
"STANDARDIZE",
"STDEV",
"STDEVA",
"STDEVP",
"STDEVPA",
"STEYX",
"SUBSTITUTE",
"SUBTOTAL",
"SUM",
"SUMIF",
"SUMIFS",
"SUMPRODUCT",
"SUMSQ",
"SUMX2MY2",
"SUMX2PY2",
"SUMXMY2",
"SYD",
"T",
"TAN",
"TANH",
"TBILLEQ",
"TBILLPRICE",
"TBILLYIELD",
"TDIST",
"TEXT",
"TIME",
"TIMEVALUE",
"TINV",
"TODAY",
"TRANSPOSE",
"TREND",
"TRIM",
"TRIMMEAN",
"TRUE",
"TRUNC",
"TTEST",
"TYPE",
"UPPER",
"VALUE",
"VAR",
"VARA",
"VARP",
"VARPA",
"VDB",
"VLOOKUP",
"WEEKDAY",
"WEEKNUM",
"WEIBULL",
"WORKDAY",
"XIRR",
"XNPV",
"YEAR",
"YEARFRAC",
"YIELD",
"YIELDDISC",
"YIELDMAT",
"ZTEST"
};
#endregion
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string Function = "Function";
public const string FunctionCall = "FunctionCall";
public const string HorizontalRange = "HRange";
public const string MultipleSheets = "MultipleSheets";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunction = "ReferenceFunction";
public const string RefError = "RefError";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string Text = "Text";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenDDE = "DDEToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenFileSheetQuoted = "FileSheetQuotedToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenNamedRange = "NamedRangeToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSheet = "SheetNameToken";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v100/ExcelFormulaParser.cs
================================================
using System;
using System.Collections;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v100
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Singleton parser instance
///
private readonly static Parser p = new Parser(new ExcelFormulaGrammar());
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = p.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
return tree;
}
///
/// All non-terminal nodes in depth-first pre-order
///
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodes(this ParseTreeNode root)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Whether this tree contains any nodes of a type
///
public static bool Contains(this ParseTreeNode root, string type)
{
return root.AllNodes(type).Any();
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return IsNamedFunction(input)
|| IsBinaryOperation(input)
|| IsUnaryOperation(input)
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.Reference))
&& input.ChildNodes.Count() == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
&& input.ChildNodes.Count() == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
&& input.ChildNodes.Count() == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (IsIntersection(input))
{
return GrammarNames.TokenIntersect;
}
if (IsBinaryOperation(input) || IsUnaryPostfixOperation(input))
{
return input.ChildNodes[1].Print();
}
if (IsUnaryPrefixOperation(input))
{
return input.ChildNodes[0].Print();
}
if (input.Is(GrammarNames.ReferenceFunction) || input.Is(GrammarNames.FunctionCall))
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.Is(GrammarNames.Reference))
{
if (input.ChildNodes.Count == 3 && input.ChildNodes[2].Is(GrammarNames.Arguments))
{
return RemoveFinalSymbol(input.ChildNodes[1].Print()).ToUpper();
}
}
throw new ArgumentException("Not a function call", "input");
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, String functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() && (node.Is(GrammarNames.ExcelFunction) || node.Is(GrammarNames.ReferenceFunction));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
try
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
catch (Exception)
{
return false;
}
}
///
/// Check if a reference node is a union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes.Exists(pt => pt.Term.Name == GrammarNames.Function))
|| input.Is(GrammarNames.ReferenceFunction)
// User defined function with prefix
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 3 && input.ChildNodes[2].Is(GrammarNames.Arguments));
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
return SkipToRelevant(input.ChildNodes[1]);
case GrammarNames.Formula:
case GrammarNames.Reference:
// This also catches parentheses
if (input.ChildNodes.Count == 1)
{
return SkipToRelevant(input.ChildNodes[0]);
}
goto default;
default:
return input;
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed childs
var childs = input.ChildNodes.Select(Print);
// Concrete list when needed
List childsL;
// Switch on nonterminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? String.Format("({0})", childs.First()) : childs.First();
case GrammarNames.FunctionCall:
if (IsNamedFunction(input))
{
return String.Join("", childs) + ")";
}
childsL = childs.ToList();
if (IsBinaryOperation(input))
{
return String.Format("{0} {1} {2}", childsL[0], childsL[1], childsL[2]);
}
// Unary function
return String.Join("", childsL);
case GrammarNames.Reference:
if (IsParentheses(input) || IsUnion(input))
{
return String.Format("({0})", childs.First());
}
childsL = childs.ToList();
if (IsIntersection(input))
{
return String.Format("{0} {1}", childsL[0], childsL[2]);
}
if (IsBinaryOperation(input))
{
return String.Format("{0}{1}{2}", childsL[0], childsL[1], childsL[2]);
}
return String.Join("", childsL);
case GrammarNames.ReferenceFunction:
return String.Join("", childs) + ")";
case GrammarNames.File:
return String.Format("[{0}]", childs.First());
case GrammarNames.Prefix:
var ret = String.Join("", childs);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + childs.ElementAt(1) + "}";
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
return String.Join("", childs);
// Terms for which we print the childs comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return String.Join(",", childs);
case GrammarNames.ArrayColumns:
return String.Join(";", childs);
case GrammarNames.ConstantArray:
return String.Format("{{{0}}}", childs.First());
default:
// If it is not defined above and the number of childs is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return childs.First();
}
throw new ArgumentException(String.Format("Could not print node of type '{0}'.\nThis probably means the excel grammar was modified without the print function being modified", input.Term.Name));
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v114/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
namespace XLParser.Web.XLParserVersions.v114
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.1.3", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
public ExcelFormulaGrammar() : base(false)
{
#region 1-Terminals
#region Symbols and operators
var comma = ToTerm(",");
var colon = ToTerm(":");
var semicolon = ToTerm(";");
var OpenParen = ToTerm("(");
var CloseParen = ToTerm(")");
var CloseSquareParen = ToTerm("]");
var OpenSquareParen = ToTerm("[");
var exclamationMark = ToTerm("!");
var CloseCurlyParen = ToTerm("}");
var OpenCurlyParen = ToTerm("{");
var mulop = ToTerm("*");
var plusop = ToTerm("+");
var divop = ToTerm("/");
var minop = ToTerm("-");
var concatop = ToTerm("&");
var expop = ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and it's default priority is low
var intersectop = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
var percentop = ToTerm("%");
var gtop = ToTerm(">");
var eqop = ToTerm("=");
var ltop = ToTerm("<");
var neqop = ToTerm("<>");
var gteop = ToTerm(">=");
var lteop = ToTerm("<=");
#endregion
#region Literals
var BoolToken = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE");
BoolToken.Priority = TerminalPriority.Bool;
var NumberToken = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None);
NumberToken.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt };
var TextToken = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak);
var ErrorToken = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A");
var RefErrorToken = ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
var UDFToken = new RegexBasedTerminal(GrammarNames.TokenUDF, @"(_xll\.)?[\w\\.]+\(");
UDFToken.Priority = TerminalPriority.UDF;
var ExcelRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(");
ExcelRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction;
var ExcelConditionalRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(");
ExcelConditionalRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction;
var ExcelFunction = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + String.Join("|", excelFunctionList) +")\\(");
ExcelFunction.Priority = TerminalPriority.ExcelFunction;
// Using this instead of Empty allows a more accurate trees
var EmptyArgumentToken = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
var VRangeToken = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?[A-Z]{1,4}:[$]?[A-Z]{1,4}");
var HRangeToken = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
const string CellTokenRegex = "[$]?[A-Z]{1,4}[$]?[1-9][0-9]*";
var CellToken = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex);
CellToken.Priority = TerminalPriority.CellToken;
const string NamedRangeRegex = @"[A-Za-z\\_][\w\.]*";
var NamedRangeToken = new RegexBasedTerminal(GrammarNames.TokenNamedRange, NamedRangeRegex);
NamedRangeToken.Priority = TerminalPriority.NamedRange;
// To prevent e.g. "A1A1" being parsed as 2 celltokens
var NamedRangeCombinationToken = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, "(TRUE|FALSE|" + CellTokenRegex + ")" + NamedRangeRegex);
NamedRangeCombinationToken.Priority = TerminalPriority.NamedRangeCombination;
const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
const string normalSheetName = "[^" + notSheetNameChars + mustBeQuotedInSheetName + "]+";
const string quotedSheetName = "([^" + notSheetNameChars + "]|'')+";
const string sheetRegEx = "((" + normalSheetName + ")|('" + quotedSheetName + "'))!";
var SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, sheetRegEx);
SheetToken.Priority = TerminalPriority.SheetToken;
var multiSheetRegex = String.Format("(({0}:{0})|('{1}:{1}'))!", normalSheetName, quotedSheetName);
var MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex);
MultipleSheetsToken.Priority = TerminalPriority.MultipleSheetsToken;
var FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, "[0-9]+");
FileToken.Priority = TerminalPriority.FileToken;;
const string quotedFileSheetRegex = @"'\[\d+\]" + quotedSheetName + "'!";
var QuotedFileSheetToken = new RegexBasedTerminal(GrammarNames.TokenFileSheetQuoted, quotedFileSheetRegex);
QuotedFileSheetToken.Priority = TerminalPriority.QuotedFileToken;
var ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+");
ReservedNameToken.Priority = TerminalPriority.ReservedName;
var DDEToken = new RegexBasedTerminal(GrammarNames.TokenDDE, @"'([^']|'')+'");
#endregion
#region Punctuation
MarkPunctuation(exclamationMark);
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenSquareParen, CloseSquareParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#endregion
#region 2-NonTerminals
// Most nonterminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use nonterminals that have been defined previously
var Argument = new NonTerminal(GrammarNames.Argument);
var Arguments = new NonTerminal(GrammarNames.Arguments);
var ArrayColumns = new NonTerminal(GrammarNames.ArrayColumns);
var ArrayConstant = new NonTerminal(GrammarNames.ArrayConstant);
var ArrayFormula = new NonTerminal(GrammarNames.ArrayFormula);
var ArrayRows = new NonTerminal(GrammarNames.ArrayRows);
var Bool = new NonTerminal(GrammarNames.Bool);
var Cell = new NonTerminal(GrammarNames.Cell);
var Constant = new NonTerminal(GrammarNames.Constant);
var ConstantArray = new NonTerminal(GrammarNames.ConstantArray);
var DynamicDataExchange = new NonTerminal(GrammarNames.DynamicDataExchange);
var EmptyArgument = new NonTerminal(GrammarNames.EmptyArgument);
var Error = new NonTerminal(GrammarNames.Error);
var File = new NonTerminal(GrammarNames.File);
var Formula = new NonTerminal(GrammarNames.Formula);
var FormulaWithEq = new NonTerminal(GrammarNames.FormulaWithEq);
var FunctionCall = new NonTerminal(GrammarNames.FunctionCall);
var FunctionName = new NonTerminal(GrammarNames.FunctionName);
var HRange = new NonTerminal(GrammarNames.HorizontalRange);
var InfixOp = new NonTerminal(GrammarNames.TransientInfixOp);
var MultipleSheets = new NonTerminal(GrammarNames.MultipleSheets);
var NamedRange = new NonTerminal(GrammarNames.NamedRange);
var Number = new NonTerminal(GrammarNames.Number);
var PostfixOp = new NonTerminal(GrammarNames.TransientPostfixOp);
var Prefix = new NonTerminal(GrammarNames.Prefix);
var PrefixOp = new NonTerminal(GrammarNames.TransientPrefixOp);
var QuotedFileSheet = new NonTerminal(GrammarNames.QuotedFileSheet);
var Reference = new NonTerminal(GrammarNames.Reference);
//var ReferenceFunction = new NonTerminal(GrammarNames.ReferenceFunction);
var ReferenceItem = new NonTerminal(GrammarNames.TransientReferenceItem);
var ReferenceFunctionCall = new NonTerminal(GrammarNames.ReferenceFunctionCall);
var RefError = new NonTerminal(GrammarNames.RefError);
var RefFunctionName = new NonTerminal(GrammarNames.RefFunctionName);
var ReservedName = new NonTerminal(GrammarNames.ReservedName);
var Sheet = new NonTerminal(GrammarNames.Sheet);
var Start = new NonTerminal(GrammarNames.TransientStart);
var Text = new NonTerminal(GrammarNames.Text);
var UDFName = new NonTerminal(GrammarNames.UDFName);
var UDFunctionCall = new NonTerminal(GrammarNames.UDFunctionCall);
var Union = new NonTerminal(GrammarNames.Union);
var VRange = new NonTerminal(GrammarNames.VerticalRange);
#endregion
#region 3-Rules
#region Base rules
Root = Start;
Start.Rule = FormulaWithEq
| Formula
| ArrayFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
//MarkTransient(Formula);
ReservedName.Rule = ReservedNameToken;
Constant.Rule = Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
//Arguments.Rule = Argument | Argument + comma + Arguments;
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
//MarkTransient(Argument);
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
//PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop;
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule = ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
//| ConditionalRefFunctionName + Arguments + CloseParen
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
//ConditionalRefFunctionName.Rule = ExcelConditionalRefFunctionToken;
QuotedFileSheet.Rule = QuotedFileSheetToken;
Sheet.Rule = SheetToken;
MultipleSheets.Rule = MultipleSheetsToken;
Cell.Rule = CellToken;
File.Rule = OpenSquareParen + FileToken + CloseSquareParen;
DynamicDataExchange.Rule = File + exclamationMark + DDEToken;
NamedRange.Rule = NamedRangeToken | NamedRangeCombinationToken;
Prefix.Rule =
Sheet
| File + Sheet
| File + exclamationMark
| QuotedFileSheet
| MultipleSheets
| File + MultipleSheets;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
//RegisterOperators(Precedence.ParameterSeparator, comma);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick celltoken because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int NamedRange = -800;
public const int ReservedName = -700;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileToken = 1200;
public const int SheetToken = 1200;
public const int QuotedFileToken = 1200;
}
#endregion
#region Excel function list
private static readonly IList excelFunctionList = new List
{
"ABS",
"ACCRINT",
"ACCRINTM",
"ACOS",
"ACOSH",
"ADDRESS",
"AMORDEGRC",
"AMORLINC",
"AND",
"AREAS",
"ASC",
"ASIN",
"ASINH",
"ATAN",
"ATAN2",
"ATANH",
"AVEDEV",
"AVERAGE",
"AVERAGEA",
"AVERAGEIF",
"AVERAGEIFS",
"BAHTTEXT",
"BESSELI",
"BESSELJ",
"BESSELK",
"BESSELY",
"BETADIST",
"BETAINV",
"BIN2DEC",
"BIN2HEX",
"BIN2OCT",
"BINOMDIST",
"CALL",
"CEILING",
"CELL",
"CHAR",
"CHIDIST",
"CHIINV",
"CHITEST",
//"CHOOSE",
"CLEAN",
"CODE",
"COLUMN",
"COLUMNS",
"COMBIN",
"COMPLEX",
"CONCATENATE",
"CONFIDENCE",
"CONVERT",
"CORREL",
"COS",
"COSH",
"COUNT",
"COUNTA",
"COUNTBLANK",
"COUNTIF",
"COUNTIFS",
"COUPDAYBS",
"COUPDAYS",
"COUPDAYSNC",
"COUPNCD",
"COUPNUM",
"COUPPCD",
"COVAR",
"CRITBINOM",
"CUBEKPIMEMBER",
"CUBEMEMBER",
"CUBEMEMBERPROPERTY",
"CUBERANKEDMEMBER",
"CUBESET",
"CUBESETCOUNT",
"CUBEVALUE",
"CUMIPMT",
"CUMPRINC",
"DATE",
"DATEVALUE",
"DAVERAGE",
"DAY",
"DAYS360",
"DB",
"DCOUNT",
"DCOUNTA",
"DDB",
"DEC2BIN",
"DEC2HEX",
"DEC2OCT",
"DEGREES",
"DELTA",
"DEVSQ",
"DGET",
"DISC",
"DMAX",
"DMIN",
"DOLLAR",
"DOLLARDE",
"DOLLARFR",
"DPRODUCT",
"DSTDEV",
"DSTDEVP",
"DSUM",
"DURATION",
"DVAR",
"DVARP",
"EDATEEFFECT",
"EOMONTH",
"ERF",
"ERFC",
"ERROR.TYPE",
"EUROCONVERT",
"EVEN",
"EXACT",
"EXP",
"EXPONDIST",
"FACT",
"FACTDOUBLE",
"FALSE",
"FDIST",
"FIND",
"FINV",
"FISHER",
"FISHERINV",
"FIXED",
"FLOOR",
"FORECAST",
"FREQUENCY",
"FTEST",
"FV",
"FVSCHEDULE",
"GAMMADIST",
"GAMMAINV",
"GAMMALN",
"GCD",
"GEOMEAN",
"GESTEP",
"GETPIVOTDATA",
"GROWTH",
"HARMEAN",
"HEX2BIN",
"HEX2DEC",
"HEX2OCT",
"HLOOKUP",
"HOUR",
"HYPERLINK",
"HYPGEOMDIST",
//"IF",
"ISBLANK",
"IFERROR",
"IMABS",
"IMAGINARY",
"IMARGUMENT",
"IMCONJUGATE",
"IMCOS",
"IMDIV",
"IMEXP",
"IMLN",
"IMLOG10",
"IMLOG2",
"IMPOWER",
"IMPRODUCT",
"IMREAL",
"IMSIN",
"IMSQRT",
"IMSUB",
"IMSUM",
"INFO",
"INT",
"INTERCEPT",
"INTRATE",
"IPMT",
"IRR",
"IS",
"ISB",
"ISERROR",
"ISNA",
"ISNUMBER",
"ISPMT",
"JIS",
"KURT",
"LARGE",
"LCM",
"LEFT",
"LEFTB",
"LEN",
"LENB",
"LINEST",
"LN",
"LOG",
"LOG10",
"LOGEST",
"LOGINV",
"LOGNORMDIST",
"LOOKUP",
"LOWER",
"MATCH",
"MAX",
"MAXA",
"MDETERM",
"MDURATION",
"MEDIAN",
"MID",
"MIDB",
"MIN",
"MINA",
"MINUTE",
"MINVERSE",
"MIRR",
"MMULT",
"MOD",
"MODE",
"MONTH",
"MROUND",
"MULTINOMIAL",
"N",
"NA",
"NEGBINOMDIST",
"NETWORKDAYS",
"NOMINAL",
"NORMDIST",
"NORMINV",
"NORMSDIST",
"NORMSINV",
"NOT",
"NOW",
"NPER",
"NPV",
"OCT2BIN",
"OCT2DEC",
"OCT2HEX",
"ODD",
"ODDFPRICE",
"ODDFYIELD",
"ODDLPRICE",
"ODDLYIELD",
"OR",
"PEARSON",
"PERCENTILE",
"PERCENTRANK",
"PERMUT",
"PHONETIC",
"PI",
"PMT",
"POISSON",
"POWER",
"PPMT",
"PRICE",
"PRICEDISC",
"PRICEMAT",
"PROB",
"PRODUCT",
"PROPER",
"PV",
"QUARTILE",
"QUOTIENT",
"RADIANS",
"RAND",
"RANDBETWEEN",
"RANK",
"RATE",
"RECEIVED",
"REGISTER.ID",
"REPLACE",
"REPLACEB",
"REPT",
"RIGHT",
"RIGHTB",
"ROMAN",
"ROUND",
"ROUNDDOWN",
"ROUNDUP",
"ROW",
"ROWS",
"RSQ",
"RTD",
"SEARCH",
"SEARCHB",
"SECOND",
"SERIESSUM",
"SIGN",
"SIN",
"SINH",
"SKEW",
"SLN",
"SLOPE",
"SMALL",
"SQL.REQUEST",
"SQRT",
"SQRTPI",
"STANDARDIZE",
"STDEV",
"STDEVA",
"STDEVP",
"STDEVPA",
"STEYX",
"SUBSTITUTE",
"SUBTOTAL",
"SUM",
"SUMIF",
"SUMIFS",
"SUMPRODUCT",
"SUMSQ",
"SUMX2MY2",
"SUMX2PY2",
"SUMXMY2",
"SYD",
"T",
"TAN",
"TANH",
"TBILLEQ",
"TBILLPRICE",
"TBILLYIELD",
"TDIST",
"TEXT",
"TIME",
"TIMEVALUE",
"TINV",
"TODAY",
"TRANSPOSE",
"TREND",
"TRIM",
"TRIMMEAN",
"TRUE",
"TRUNC",
"TTEST",
"TYPE",
"UPPER",
"VALUE",
"VAR",
"VARA",
"VARP",
"VARPA",
"VDB",
"VLOOKUP",
"WEEKDAY",
"WEEKNUM",
"WEIBULL",
"WORKDAY",
"XIRR",
"XNPV",
"YEAR",
"YEARFRAC",
"YIELD",
"YIELDDISC",
"YIELDMAT",
"ZTEST"
};
#endregion
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultipleSheets = "MultipleSheets";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
//public const string ReferenceFunction = "ReferenceFunction";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenDDE = "DDEToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenFileSheetQuoted = "FileSheetQuotedToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenNamedRange = "NamedRangeToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSheet = "SheetNameToken";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v114/ExcelFormulaParser.cs
================================================
using System;
using System.Collections;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v114
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser p
{
get { return _p ?? (_p = new Parser(new ExcelFormulaGrammar())); }
}
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = p.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Whether this tree contains any nodes of a type
///
public static bool Contains(this ParseTreeNode root, string type)
{
return root.AllNodes(type).Any();
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDF's, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count() == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count() == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return String.Format("{0}{1}", input.ChildNodes[0].Print(), GetFunction(input.ChildNodes[1]));
}
throw new ArgumentException("Not a function call", "input");
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, String functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
return SkipToRelevant(input.ChildNodes[1]);
case GrammarNames.Formula:
case GrammarNames.Reference:
// This also catches parentheses
if (input.ChildNodes.Count == 1)
{
return SkipToRelevant(input.ChildNodes[0]);
}
goto default;
default:
return input;
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed childs
var childs = input.ChildNodes.Select(Print);
// Concrete list when needed
List childsL;
// Switch on nonterminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? String.Format("({0})", childs.First()) : childs.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childsL = childs.ToList();
if (input.IsNamedFunction())
{
return String.Join("", childsL) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0} {1} {2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}else if (input.IsBinaryReferenceOperation())
{
format = "{0}{1}{2}";
}
return String.Format(format, childsL[0], childsL[1], childsL[2]);
}
if (input.IsUnion())
{
return String.Format("({0})", String.Join(",", childsL));
}
if (input.IsUnaryOperation())
{
return String.Join("", childsL);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
/*if (IsParentheses(input) || IsUnion(input))
{
return String.Format("({0})", childs.First());
}
childsL = childs.ToList();
if (IsIntersection(input))
{
return String.Format("{0} {1}", childsL[0], childsL[2]);
}
if (IsBinaryOperation(input))
{
return String.Format("{0}{1}{2}", childsL[0], childsL[1], childsL[2]);
}*/
if (IsParentheses(input))
{
return String.Format("({0})", childs.First());
}
return String.Join("", childs);
case GrammarNames.File:
return String.Format("[{0}]", childs.First());
case GrammarNames.Prefix:
var ret = String.Join("", childs);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + childs.ElementAt(1) + "}";
case GrammarNames.DynamicDataExchange:
childsL = childs.ToList();
return String.Format("{0}!{1}", childsL[0], childsL[1]);
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.FormulaWithEq:
return String.Join("", childs);
// Terms for which we print the childs comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return String.Join(",", childs);
case GrammarNames.ArrayColumns:
return String.Join(";", childs);
case GrammarNames.ConstantArray:
return String.Format("{{{0}}}", childs.First());
default:
// If it is not defined above and the number of childs is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return childs.First();
}
throw new ArgumentException(String.Format("Could not print node of type '{0}'.\nThis probably means the excel grammar was modified without the print function being modified", input.Term.Name));
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v120/ExcelFormulaGrammar.cs
================================================
using System;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v120
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.2.0", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and it's default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt}
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, @"(_xll\.)?[\w\\.]+\(")
{ Priority = TerminalPriority.UDF };
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(")
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate trees
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?[A-Z]{1,4}:[$]?[A-Z]{1,4}");
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
private const string CellTokenRegex = "[$]?[A-Z]{1,4}[$]?[1-9][0-9]*";
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex)
{ Priority = TerminalPriority.CellToken };
private const string NamedRangeRegex = @"[A-Za-z\\_][\w\.]*";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NamedRangeRegex)
{ Priority = TerminalPriority.Name };
// To prevent e.g. "A1A1" being parsed as 2 celltokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, "(TRUE|FALSE|" + CellTokenRegex + ")" + NamedRangeRegex)
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
//public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[")
//{Priority = 0};
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+")
{ Priority = TerminalPriority.SRColumn };
//public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->]+( )*\]")
//{Priority = 0};
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')+";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"(({normalSheetName}:{normalSheetName})|('{quotedSheetName}:{quotedSheetName}'))!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\]";
public Terminal FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex)
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameRegex = @"\[[^<>:""/\|?*\[\]]+\]";
public Terminal EnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenEnclosedInBrackets, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string filePathRegex = @"(?:[a-zA-Z]\:|\\\\[\w\.]+\\[\w.$]+)\\(?:[\w]+\\)*";
public Terminal FilePathWindowsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePathWindows, filePathRegex);
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most nonterminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use nonterminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
//public NonTerminal ReferenceFunction{ get; } = new NonTerminal(GrammarNames.ReferenceFunction);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructureReference { get; } = new NonTerminal(GrammarNames.StructureReference);
public NonTerminal StructureReferenceColumnOrKeyword { get; } = new NonTerminal(GrammarNames.StructureReferenceColumnOrKeyword);
public NonTerminal StructureReferenceExpression { get; } = new NonTerminal(GrammarNames.StructureReferenceExpression);
//public NonTerminal StructureReferenceKeyword { get; } = new NonTerminal(GrammarNames.StructureReferenceKeyword);
public NonTerminal StructureReferenceTable { get; } = new NonTerminal(GrammarNames.StructureReferenceTable);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar() : base(false)
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenSquareParen, CloseSquareParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
//exclamationMark.SetFlag(TermFlags.IsDelimiter);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule = FormulaWithEq
| Formula
| ArrayFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
//MarkTransient(Formula);
ReservedName.Rule = ReservedNameToken;
Constant.Rule = Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
//Arguments.Rule = Argument | Argument + comma + Arguments;
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
//MarkTransient(Argument);
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
//PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop;
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule = ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
//| ConditionalRefFunctionName + Arguments + CloseParen
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructureReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule = FileToken
| EnclosedInBracketsToken
| FilePathWindowsToken + EnclosedInBracketsToken
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| File + MultipleSheetsToken
;
StructureReferenceColumnOrKeyword.Rule =
OpenSquareParen + SRColumnToken + CloseSquareParen
| OpenSquareParen + NameToken + CloseSquareParen
| EnclosedInBracketsToken;
//StructureReferenceKeyword.Rule = EnclosedInBracketsToken;
StructureReferenceTable.Rule = NameToken;
StructureReferenceExpression.Rule =
StructureReferenceColumnOrKeyword
| StructureReferenceColumnOrKeyword + colon + StructureReferenceColumnOrKeyword
| StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword
| StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword + colon + StructureReferenceColumnOrKeyword
| StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword
| StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword + colon + StructureReferenceColumnOrKeyword
;
StructureReference.Rule =
StructureReferenceColumnOrKeyword
| OpenSquareParen + StructureReferenceExpression + CloseSquareParen
| StructureReferenceTable + StructureReferenceColumnOrKeyword
| StructureReferenceTable + OpenSquareParen + StructureReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
//RegisterOperators(Precedence.ParameterSeparator, comma);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick celltoken because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int SRColumn = -900;
public const int Name = -800;
public const int ReservedName = -700;
public const int FileName = -500;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => Properties.Resources.ExcelBuiltinFunctionList_v120.Split(new [] {'\n', '\r'}, StringSplitOptions.RemoveEmptyEntries);
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructureReference = "StructureReference";
public const string StructureReferenceColumnOrKeyword = "StructureReferenceColumnOrKeyword";
public const string StructureReferenceExpression = "StructureReferenceExpression";
public const string StructureReferenceTable = "StructureReferenceTable";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePathWindows = "FilePathWindowsToken";
public const string TokenEnclosedInBrackets = "EnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRTableName = "SRTableName";
public const string TokenSRKeyword = "SRKeyword";
public const string TokenSRColumn = "SRColumn";
public const string TokenSREnclosedColumn = "SREnclosedColumn";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v120/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v120
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser p => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = p.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDF's, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count() == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count() == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" nonterminal
.ChildNodes // "Argument" nonterminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument nonterminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix nonterminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix)
{
if(prefix.Type() != GrammarNames.Prefix) throw new ArgumentException("Not a prefix", nameof(prefix));
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
int cur = 0;
// Check for quotes
bool quoted = prefix.ChildNodes[cur].Is("'");
if (quoted) cur++;
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
var file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
int n;
int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out n);
fileNumber = n;
if (fileNumber == 0) fileNumber = null;
}
else
{
// String filename
var icur = 0;
// Check if it includes a path
if (file.ChildNodes[icur].Is(GrammarNames.TokenFilePathWindows))
{
filePath = file.ChildNodes[icur].Print();
icur++;
}
fileName = Substr(file.ChildNodes[icur].Print(), 1, 1);
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Put it all into the convencience class
return new PrefixInfo(
sheetName,
fileNumber,
fileName,
filePath,
multipleSheets,
quoted
);
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length-removeLast-removeFirst);
}
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
case GrammarNames.Reference:
// This also catches parentheses
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed childs
var childs = input.ChildNodes.Select(Print);
// Concrete list when needed
List childsL;
string ret;
// Switch on nonterminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({childs.First()})" : childs.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childsL = childs.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childsL) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0} {1} {2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}else if (input.IsBinaryReferenceOperation())
{
format = "{0}{1}{2}";
}
return string.Format(format, childsL[0], childsL[1], childsL[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childsL)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childsL);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
if (IsParentheses(input))
{
return $"({childs.First()})";
}
return string.Join("", childs);
case GrammarNames.Prefix:
ret = string.Join("", childs);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + childs.ElementAt(1) + "}";
case GrammarNames.StructureReference:
ret = "";
var hastable = input.ChildNodes.Count == 2;
var contentsNode = hastable ? 1 : 0;
childsL = childs.ToList();
if (hastable) ret += childsL[0];
if (input.ChildNodes[contentsNode].Is(GrammarNames.StructureReferenceColumnOrKeyword))
{
ret += childsL[contentsNode];
} else
{
ret += $"[{childsL[contentsNode]}]";
}
return ret;
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.StructureReferenceExpression:
return string.Join("", childs);
// Terms for which we print the childs comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", childs);
case GrammarNames.ArrayColumns:
return string.Join(";", childs);
case GrammarNames.ConstantArray:
return $"{{{childs.First()}}}";
default:
// If it is not defined above and the number of childs is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return childs.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'.\nThis probably means the excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v120/PrefixInfo.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace XLParser.Web.XLParserVersions.v120
{
public class PrefixInfo
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? fileNumber;
public int FileNumber => fileNumber.Value;
public bool HasFileNumber => fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
this.fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
public override string ToString()
{
string res = "";
if (IsQuoted) res += "'";
if (HasFilePath) res += FilePath;
if (HasFileNumber) res += $"[{FileNumber}]";
if (HasFileName) res += $"[{FileName}]";
if (HasSheet) res += Sheet;
if (HasMultipleSheets) res += MultipleSheets;
if (IsQuoted) res += "'";
res += "!";
return res;
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v139/ExcelFormulaGrammar.cs
================================================
using System;
using System.IO;
using System.Reflection;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v139
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.3.9", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt}
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, $@"('[^<>""/\|?*]+\.xla'!|_xll\.)?[\w{SpecialUdfChars}\\.]+\(")
{ Priority = TerminalPriority.UDF };
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(")
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])";
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*";
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex)
{ Priority = TerminalPriority.CellToken };
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*")
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NameInvalidWordsRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+")
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
//public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[")
//{Priority = 0};
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+")
{ Priority = TerminalPriority.SRColumn };
//public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->]+( )*\]")
//{Priority = 0};
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\]";
public Terminal FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex)
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameForbiddenCharacter = @"<>:""/\|?*";
private const string fileNameRegex = @"\[[^\[\]]+\]";
public Terminal EnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenEnclosedInBrackets, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string filePathRegex = @"(?:[a-zA-Z]\:|\\\\[\w\.]+\\[\w.$]+)\\(([^" + fileNameForbiddenCharacter + @"\\]| )+\\)*";
public Terminal FilePathWindowsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePathWindows, filePathRegex);
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
//public NonTerminal ReferenceFunction{ get; } = new NonTerminal(GrammarNames.ReferenceFunction);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
//public NonTerminal StructuredReferenceKeyword { get; } = new NonTerminal(GrammarNames.StructuredReferenceKeyword);
public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar() : base(false)
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenSquareParen, CloseSquareParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
//exclamationMark.SetFlag(TermFlags.IsDelimiter);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule = FormulaWithEq
| Formula
| ArrayFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
//MarkTransient(Formula);
ReservedName.Rule = ReservedNameToken;
Constant.Rule = Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
//Arguments.Rule = Argument | Argument + comma + Arguments;
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
//MarkTransient(Argument);
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
//PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop;
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule = ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule = FileToken
| EnclosedInBracketsToken
| FilePathWindowsToken + EnclosedInBracketsToken
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
;
StructuredReferenceElement.Rule =
OpenSquareParen + SRColumnToken + CloseSquareParen
| OpenSquareParen + NameToken + CloseSquareParen
| EnclosedInBracketsToken;
//StructuredReferenceKeyword.Rule = EnclosedInBracketsToken;
StructuredReferenceTable.Rule = NameToken;
StructuredReferenceExpression.Rule =
StructuredReferenceElement
| at + StructuredReferenceElement
| StructuredReferenceElement + colon + StructuredReferenceElement
| at + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
;
StructuredReference.Rule =
StructuredReferenceElement
| OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceTable + StructuredReferenceElement
| StructuredReferenceTable + OpenSquareParen + CloseSquareParen
| StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
//RegisterOperators(Precedence.ParameterSeparator, comma);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int SRColumn = -900;
public const int Name = -800;
public const int ReservedName = -700;
public const int FileName = -500;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v139;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceElement = "StructuredReferenceElement";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceTable = "StructuredReferenceTable";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePathWindows = "FilePathWindowsToken";
public const string TokenEnclosedInBrackets = "EnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRTableName = "SRTableName";
public const string TokenSRKeyword = "SRKeyword";
public const string TokenSRColumn = "SRColumn";
public const string TokenSREnclosedColumn = "SREnclosedColumn";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v139/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v139
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.LocationString = node.Print();
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() &&
input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0} {1} {2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}else if (input.IsBinaryReferenceOperation())
{
format = "{0}{1}{2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
case GrammarNames.StructuredReference:
var sb = new StringBuilder();
var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable);
var contentsNode = hashtable ? 1 : 0;
childrenList = children.ToList();
if (hashtable)
{
sb.Append(childrenList[0]);
}
if (hashtable && input.ChildNodes.Count == 1)
{
// Full table reference
sb.Append("[]");
}
else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement))
{
sb.Append(childrenList[contentsNode]);
}
else
{
sb.Append($"[{childrenList[contentsNode]}]");
}
return sb.ToString();
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.StructuredReferenceExpression:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v139/ParserReference.cs
================================================
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v139
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError
}
public class ParserReference
{
public const int MaxRangeHeight = 100;
public const int MaxRangeWidth = 100;
public ReferenceType ReferenceType { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FileName { get; set; }
public string Name { get; private set; }
public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1
public string MaxLocation { get; set; }
public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null,
string fileName = null, string name = null, string minLocation = null, string maxLocation = null)
{
ReferenceType = referenceType;
LocationString = locationString;
Worksheet = worksheet;
LastWorksheet = lastWorksheet;
FileName = fileName;
Name = name;
MinLocation = minLocation;
MaxLocation = maxLocation != null ? maxLocation : minLocation;
}
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
else
{
FileName = null;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
if (FileName != null)
{
MinLocation = "A1";
}
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = "A" + horizontalLimits[0];
MaxLocation = ConvertColumnToStr(MaxRangeWidth - 1) + horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0] + "1";
MaxLocation = verticalLimits[1] + MaxRangeHeight;
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
MinLocation = "A1";
break;
default:
// UDFs
MinLocation = "A1";
break;
}
LocationString = node.Print();
}
///
/// Converts the column number to an Excel column string representation.
///
/// The zero-based column number.
private string ConvertColumnToStr(int columnNumber)
{
var sb = new System.Text.StringBuilder();
while (columnNumber >= 0)
{
sb.Insert(0, (char)(65 + columnNumber % 26));
columnNumber = columnNumber / 26 - 1;
}
return sb.ToString();
}
public override string ToString()
{
return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation);
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v139/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v139
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePathWindows))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v141/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.IO;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v141
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.4.1", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt}
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF};
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(")
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])";
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*";
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex)
{ Priority = TerminalPriority.CellToken };
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*")
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NameInvalidWordsRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+")
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
//public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[")
//{Priority = 0};
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+")
{ Priority = TerminalPriority.SRColumn };
//public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->]+( )*\]")
//{Priority = 0};
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\]";
public Terminal FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex)
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameForbiddenCharacter = @"<>:""/\|?*";
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\]";
public Terminal EnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenEnclosedInBrackets, fileNameInBracketsRegex)
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.]+\..{1,4}";
public Terminal FileNameWindowsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameWindows, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string filePathRegex = @"(?:[a-zA-Z]\:|\\\\[\w\.]+\\[\w.$]+)\\(([^" + fileNameForbiddenCharacter + @"\\]| )+\\)*";
public Terminal FilePathWindowsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePathWindows, filePathRegex);
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
//public NonTerminal ReferenceFunction{ get; } = new NonTerminal(GrammarNames.ReferenceFunction);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
//public NonTerminal StructuredReferenceKeyword { get; } = new NonTerminal(GrammarNames.StructuredReferenceKeyword);
public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar() : base(false)
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenSquareParen, CloseSquareParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
//exclamationMark.SetFlag(TermFlags.IsDelimiter);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule = FormulaWithEq
| Formula
| ArrayFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
//MarkTransient(Formula);
ReservedName.Rule = ReservedNameToken;
Constant.Rule = Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
//Arguments.Rule = Argument | Argument + comma + Arguments;
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
//MarkTransient(Argument);
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
//PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop;
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule = ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule = FileToken
| EnclosedInBracketsToken
| FilePathWindowsToken + EnclosedInBracketsToken
| FilePathWindowsToken + FileNameWindowsToken
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceElement.Rule =
OpenSquareParen + SRColumnToken + CloseSquareParen
| OpenSquareParen + NameToken + CloseSquareParen
| EnclosedInBracketsToken;
//StructuredReferenceKeyword.Rule = EnclosedInBracketsToken;
StructuredReferenceTable.Rule = NameToken;
StructuredReferenceExpression.Rule =
StructuredReferenceElement
| at + StructuredReferenceElement
| StructuredReferenceElement + colon + StructuredReferenceElement
| at + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
;
StructuredReference.Rule =
StructuredReferenceElement
| OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceTable + StructuredReferenceElement
| StructuredReferenceTable + OpenSquareParen + CloseSquareParen
| StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
//RegisterOperators(Precedence.ParameterSeparator, comma);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int SRColumn = -900;
public const int Name = -800;
public const int ReservedName = -700;
public const int FileName = -500;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v141;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceElement = "StructuredReferenceElement";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceTable = "StructuredReferenceTable";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePathWindows = "FilePathWindowsToken";
public const string TokenFileNameWindows = "FileNameWindowsToken";
public const string TokenEnclosedInBrackets = "EnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRTableName = "SRTableName";
public const string TokenSRKeyword = "SRKeyword";
public const string TokenSRColumn = "SRColumn";
public const string TokenSREnclosedColumn = "SREnclosedColumn";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v141/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v141
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.LocationString = node.Print();
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() &&
input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0} {1} {2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}else if (input.IsBinaryReferenceOperation())
{
format = "{0}{1}{2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
case GrammarNames.StructuredReference:
var sb = new StringBuilder();
var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable);
var contentsNode = hashtable ? 1 : 0;
childrenList = children.ToList();
if (hashtable)
{
sb.Append(childrenList[0]);
}
if (hashtable && input.ChildNodes.Count == 1)
{
// Full table reference
sb.Append("[]");
}
else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement))
{
sb.Append(childrenList[contentsNode]);
}
else
{
sb.Append($"[{childrenList[contentsNode]}]");
}
return sb.ToString();
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.StructuredReferenceExpression:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v141/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v141
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table
}
public class ParserReference
{
public const int MaxRangeHeight = 100;
public const int MaxRangeWidth = 100;
public ReferenceType ReferenceType { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FileName { get; set; }
public string Name { get; private set; }
public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1
public string MaxLocation { get; set; }
public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null,
string fileName = null, string name = null, string minLocation = null, string maxLocation = null)
{
ReferenceType = referenceType;
LocationString = locationString;
Worksheet = worksheet;
LastWorksheet = lastWorksheet;
FileName = fileName;
Name = name;
MinLocation = minLocation;
MaxLocation = maxLocation != null ? maxLocation : minLocation;
}
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
else
{
FileName = null;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceTable)?.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = "A" + horizontalLimits[0];
MaxLocation = ConvertColumnToStr(MaxRangeWidth - 1) + horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0] + "1";
MaxLocation = verticalLimits[1] + MaxRangeHeight;
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
}
LocationString = node.Print();
}
///
/// Converts the column number to an Excel column string representation.
///
/// The zero-based column number.
private string ConvertColumnToStr(int columnNumber)
{
var sb = new System.Text.StringBuilder();
while (columnNumber >= 0)
{
sb.Insert(0, (char)(65 + columnNumber % 26));
columnNumber = columnNumber / 26 - 1;
}
return sb.ToString();
}
public override string ToString()
{
return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation);
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v141/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v141
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePathWindows))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v142/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.IO;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v142
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.4.2", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt}
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF};
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(")
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])";
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*";
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex)
{ Priority = TerminalPriority.CellToken };
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*")
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NameInvalidWordsRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+")
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
//public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[")
//{Priority = 0};
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+")
{ Priority = TerminalPriority.SRColumn };
//public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->]+( )*\]")
//{Priority = 0};
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\]";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex)
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\]";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex)
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string fileNameForbiddenCharacter = @"<>:""/\|?*";
private const string filePathRegex = @"(?:[a-zA-Z]:|https?:\\|\\?\\?[\w\.-]+\\[\w.$]+)\\(([^" + fileNameForbiddenCharacter + @"\\]| )+\\)*";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex);
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
//public NonTerminal ReferenceFunction{ get; } = new NonTerminal(GrammarNames.ReferenceFunction);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
//public NonTerminal StructuredReferenceKeyword { get; } = new NonTerminal(GrammarNames.StructuredReferenceKeyword);
public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar() : base(false)
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenSquareParen, CloseSquareParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
//exclamationMark.SetFlag(TermFlags.IsDelimiter);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule = FormulaWithEq
| Formula
| ArrayFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
//MarkTransient(Formula);
ReservedName.Rule = ReservedNameToken;
Constant.Rule = Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
//Arguments.Rule = Argument | Argument + comma + Arguments;
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
//MarkTransient(Argument);
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
//PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop;
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule = ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule = FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceElement.Rule =
OpenSquareParen + SRColumnToken + CloseSquareParen
| OpenSquareParen + NameToken + CloseSquareParen
| FileNameEnclosedInBracketsToken;
//StructuredReferenceKeyword.Rule = EnclosedInBracketsToken;
StructuredReferenceTable.Rule = NameToken;
StructuredReferenceExpression.Rule =
StructuredReferenceElement
| at + StructuredReferenceElement
| StructuredReferenceElement + colon + StructuredReferenceElement
| at + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
;
StructuredReference.Rule =
StructuredReferenceElement
| OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceTable + StructuredReferenceElement
| StructuredReferenceTable + OpenSquareParen + CloseSquareParen
| StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
//RegisterOperators(Precedence.ParameterSeparator, comma);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int SRColumn = -900;
public const int Name = -800;
public const int ReservedName = -700;
public const int FileName = -500;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v142;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceElement = "StructuredReferenceElement";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceTable = "StructuredReferenceTable";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRTableName = "SRTableName";
public const string TokenSRKeyword = "SRKeyword";
public const string TokenSRColumn = "SRColumn";
public const string TokenSREnclosedColumn = "SREnclosedColumn";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v142/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v142
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.LocationString = node.Print();
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() &&
input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0} {1} {2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}else if (input.IsBinaryReferenceOperation())
{
format = "{0}{1}{2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
case GrammarNames.StructuredReference:
var sb = new StringBuilder();
var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable);
var contentsNode = hashtable ? 1 : 0;
childrenList = children.ToList();
if (hashtable)
{
sb.Append(childrenList[0]);
}
if (hashtable && input.ChildNodes.Count == 1)
{
// Full table reference
sb.Append("[]");
}
else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement))
{
sb.Append(childrenList[contentsNode]);
}
else
{
sb.Append($"[{childrenList[contentsNode]}]");
}
return sb.ToString();
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.StructuredReferenceExpression:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v142/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v142
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table
}
public class ParserReference
{
public const int MaxRangeHeight = 100;
public const int MaxRangeWidth = 100;
public ReferenceType ReferenceType { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; private set; }
public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1
public string MaxLocation { get; set; }
public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null,
string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null)
{
ReferenceType = referenceType;
LocationString = locationString;
Worksheet = worksheet;
LastWorksheet = lastWorksheet;
FilePath = filePath;
FileName = fileName;
Name = name;
MinLocation = minLocation;
MaxLocation = maxLocation != null ? maxLocation : minLocation;
}
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceTable)?.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = "A" + horizontalLimits[0];
MaxLocation = ConvertColumnToStr(MaxRangeWidth - 1) + horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0] + "1";
MaxLocation = verticalLimits[1] + MaxRangeHeight;
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
}
LocationString = node.Print();
}
///
/// Converts the column number to an Excel column string representation.
///
/// The zero-based column number.
private string ConvertColumnToStr(int columnNumber)
{
var sb = new System.Text.StringBuilder();
while (columnNumber >= 0)
{
sb.Insert(0, (char)(65 + columnNumber % 26));
columnNumber = columnNumber / 26 - 1;
}
return sb.ToString();
}
public override string ToString()
{
return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation);
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v142/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v142
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v150/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.IO;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v150
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.5.0", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt}
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF};
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(")
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])";
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*";
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex)
{ Priority = TerminalPriority.CellToken };
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*")
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NameInvalidWordsRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+")
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
//public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[")
//{Priority = 0};
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+")
{ Priority = TerminalPriority.SRColumn };
//public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->]+( )*\]")
//{Priority = 0};
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\]";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex)
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\]";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex)
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\.-]+\\[\w.$]+)\\(([^<>:""/\|?*\\]| )+\\)*";
private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&%\$#_]*)?";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex);
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
//public NonTerminal ReferenceFunction{ get; } = new NonTerminal(GrammarNames.ReferenceFunction);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
//public NonTerminal StructuredReferenceKeyword { get; } = new NonTerminal(GrammarNames.StructuredReferenceKeyword);
public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar() : base(false)
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenSquareParen, CloseSquareParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
//exclamationMark.SetFlag(TermFlags.IsDelimiter);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule = FormulaWithEq
| Formula
| ArrayFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
//MarkTransient(Formula);
ReservedName.Rule = ReservedNameToken;
Constant.Rule = Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
//Arguments.Rule = Argument | Argument + comma + Arguments;
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
//MarkTransient(Argument);
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
//PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop;
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule = ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash;
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule = FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceElement.Rule =
OpenSquareParen + SRColumnToken + CloseSquareParen
| OpenSquareParen + NameToken + CloseSquareParen
| FileNameEnclosedInBracketsToken;
//StructuredReferenceKeyword.Rule = EnclosedInBracketsToken;
StructuredReferenceTable.Rule = NameToken;
StructuredReferenceExpression.Rule =
StructuredReferenceElement
| at + StructuredReferenceElement
| StructuredReferenceElement + colon + StructuredReferenceElement
| at + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
;
StructuredReference.Rule =
StructuredReferenceElement
| OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceTable + StructuredReferenceElement
| StructuredReferenceTable + OpenSquareParen + CloseSquareParen
| StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, plusop, minop, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int SRColumn = -900;
public const int Name = -800;
public const int ReservedName = -700;
public const int FileName = -500;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v150;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceElement = "StructuredReferenceElement";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceTable = "StructuredReferenceTable";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRTableName = "SRTableName";
public const string TokenSRKeyword = "SRKeyword";
public const string TokenSRColumn = "SRColumn";
public const string TokenSREnclosedColumn = "SREnclosedColumn";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v150/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v150
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.LocationString = node.Print();
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() &&
input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0}{1}{2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
case GrammarNames.StructuredReference:
var sb = new StringBuilder();
var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable);
var contentsNode = hashtable ? 1 : 0;
childrenList = children.ToList();
if (hashtable)
{
sb.Append(childrenList[0]);
}
if (hashtable && input.ChildNodes.Count == 1)
{
// Full table reference
sb.Append("[]");
}
else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement))
{
sb.Append(childrenList[contentsNode]);
}
else
{
sb.Append($"[{childrenList[contentsNode]}]");
}
return sb.ToString();
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.StructuredReferenceExpression:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v150/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v150
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table
}
public class ParserReference
{
public const int MaxRangeHeight = 1048576;
public const int MaxRangeWidth = 16384;
public ReferenceType ReferenceType { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; private set; }
public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1
public string MaxLocation { get; set; }
public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null,
string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null)
{
ReferenceType = referenceType;
LocationString = locationString;
Worksheet = worksheet;
LastWorksheet = lastWorksheet;
FilePath = filePath;
FileName = fileName;
Name = name;
MinLocation = minLocation;
MaxLocation = maxLocation != null ? maxLocation : minLocation;
}
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceTable)?.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = (horizontalLimits[0].StartsWith("$") ? "$" : "") + "A" + horizontalLimits[0];
MaxLocation = (horizontalLimits[1].StartsWith("$") ? "$" : "") + ConvertColumnToStr(MaxRangeWidth - 1) + horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0] + (verticalLimits[0].StartsWith("$") ? "$" : "") + "1";
MaxLocation = verticalLimits[1] + (verticalLimits[1].StartsWith("$") ? "$" : "") + MaxRangeHeight;
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
}
LocationString = node.Print();
}
///
/// Converts the column number to an Excel column string representation.
///
/// The zero-based column number.
private string ConvertColumnToStr(int columnNumber)
{
var sb = new System.Text.StringBuilder();
while (columnNumber >= 0)
{
sb.Insert(0, (char)(65 + columnNumber % 26));
columnNumber = columnNumber / 26 - 1;
}
return sb.ToString();
}
public override string ToString()
{
return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation);
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v150/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v150
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v151/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.IO;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v151
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.5.1", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt}
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF};
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(")
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])";
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*";
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex)
{ Priority = TerminalPriority.CellToken };
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*")
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NameInvalidWordsRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+")
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
//public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[")
//{Priority = 0};
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+")
{ Priority = TerminalPriority.SRColumn };
//public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->]+( )*\]")
//{Priority = 0};
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\]";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex)
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\]";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex)
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\.-]+\\[\w.$]+)\\(([^<>:""/\|?*\\]| )+\\)*";
private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&%\$#_]*)?";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex);
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar() : base(false)
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenSquareParen, CloseSquareParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule =
FormulaWithEq
| Formula
| ArrayFormula
| MultiRangeFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
MultiRangeFormula.Rule = eqop + Union;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference + ReduceHere()
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
ReservedName.Rule = ReservedNameToken;
Constant.Rule =
Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule =
ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule =
FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceElement.Rule =
OpenSquareParen + SRColumnToken + CloseSquareParen
| OpenSquareParen + NameToken + CloseSquareParen
| FileNameEnclosedInBracketsToken;
StructuredReferenceTable.Rule = NameToken;
StructuredReferenceExpression.Rule =
StructuredReferenceElement
| at + StructuredReferenceElement
| StructuredReferenceElement + colon + StructuredReferenceElement
| at + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
;
StructuredReference.Rule =
StructuredReferenceElement
| OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceTable + StructuredReferenceElement
| StructuredReferenceTable + OpenSquareParen + CloseSquareParen
| StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int SRColumn = -900;
public const int Name = -800;
public const int ReservedName = -700;
public const int FileName = -500;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v151;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultiRangeFormula = "MultiRangeFormula";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceElement = "StructuredReferenceElement";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceTable = "StructuredReferenceTable";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRTableName = "SRTableName";
public const string TokenSRKeyword = "SRKeyword";
public const string TokenSRColumn = "SRColumn";
public const string TokenSREnclosedColumn = "SREnclosedColumn";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v151/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v151
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.LocationString = node.Print();
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() &&
input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0}{1}{2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
case GrammarNames.StructuredReference:
var sb = new StringBuilder();
var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable);
var contentsNode = hashtable ? 1 : 0;
childrenList = children.ToList();
if (hashtable)
{
sb.Append(childrenList[0]);
}
if (hashtable && input.ChildNodes.Count == 1)
{
// Full table reference
sb.Append("[]");
}
else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement))
{
sb.Append(childrenList[contentsNode]);
}
else
{
sb.Append($"[{childrenList[contentsNode]}]");
}
return sb.ToString();
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.MultiRangeFormula:
case GrammarNames.StructuredReferenceExpression:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v151/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v151
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table
}
public class ParserReference
{
public const int MaxRangeHeight = 1048576;
public const int MaxRangeWidth = 16384;
public ReferenceType ReferenceType { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; private set; }
public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1
public string MaxLocation { get; set; }
public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null,
string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null)
{
ReferenceType = referenceType;
LocationString = locationString;
Worksheet = worksheet;
LastWorksheet = lastWorksheet;
FilePath = filePath;
FileName = fileName;
Name = name;
MinLocation = minLocation;
MaxLocation = maxLocation != null ? maxLocation : minLocation;
}
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceTable)?.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = horizontalLimits[0];
MaxLocation = horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0];
MaxLocation = verticalLimits[1];
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
}
LocationString = node.Print();
}
///
/// Converts the column number to an Excel column string representation.
///
/// The zero-based column number.
private string ConvertColumnToStr(int columnNumber)
{
var sb = new System.Text.StringBuilder();
while (columnNumber >= 0)
{
sb.Insert(0, (char)(65 + columnNumber % 26));
columnNumber = columnNumber / 26 - 1;
}
return sb.ToString();
}
public override string ToString()
{
return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation);
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v151/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v151
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v152/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.IO;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v152
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.5.2", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt}
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF};
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(")
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])";
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*";
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex)
{ Priority = TerminalPriority.CellToken };
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*")
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NameInvalidWordsRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+")
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
//public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[")
//{Priority = 0};
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+")
{ Priority = TerminalPriority.SRColumn };
//public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->]+( )*\]")
//{Priority = 0};
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\]";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex)
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\]";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex)
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.\\]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ ]+)\\(([^<>:\""/\|?*\\]| )+\\)*";
private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex)
{ Priority = TerminalPriority.FileNamePath };
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar() : base(false)
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenSquareParen, CloseSquareParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule =
FormulaWithEq
| Formula
| ArrayFormula
| MultiRangeFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
MultiRangeFormula.Rule = eqop + Union;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference + ReduceHere()
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
ReservedName.Rule = ReservedNameToken;
Constant.Rule =
Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule =
ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule =
FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceElement.Rule =
OpenSquareParen + SRColumnToken + CloseSquareParen
| OpenSquareParen + NameToken + CloseSquareParen
| FileNameEnclosedInBracketsToken;
StructuredReferenceTable.Rule = NameToken;
StructuredReferenceExpression.Rule =
StructuredReferenceElement
| at + StructuredReferenceElement
| StructuredReferenceElement + colon + StructuredReferenceElement
| at + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement
| StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement
;
StructuredReference.Rule =
StructuredReferenceElement
| OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceTable + StructuredReferenceElement
| StructuredReferenceTable + OpenSquareParen + CloseSquareParen
| StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int SRColumn = -900;
public const int Name = -800;
public const int ReservedName = -700;
public const int FileName = -500;
public const int FileNamePath = -800;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v152;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultiRangeFormula = "MultiRangeFormula";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceElement = "StructuredReferenceElement";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceTable = "StructuredReferenceTable";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRTableName = "SRTableName";
public const string TokenSRKeyword = "SRKeyword";
public const string TokenSRColumn = "SRColumn";
public const string TokenSREnclosedColumn = "SREnclosedColumn";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v152/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v152
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.LocationString = node.Print();
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() &&
input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0}{1}{2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
case GrammarNames.StructuredReference:
var sb = new StringBuilder();
var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable);
var contentsNode = hashtable ? 1 : 0;
childrenList = children.ToList();
if (hashtable)
{
sb.Append(childrenList[0]);
}
if (hashtable && input.ChildNodes.Count == 1)
{
// Full table reference
sb.Append("[]");
}
else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement))
{
sb.Append(childrenList[contentsNode]);
}
else
{
sb.Append($"[{childrenList[contentsNode]}]");
}
return sb.ToString();
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.MultiRangeFormula:
case GrammarNames.StructuredReferenceExpression:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v152/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v152
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table
}
public class ParserReference
{
public const int MaxRangeHeight = 1048576;
public const int MaxRangeWidth = 16384;
public ReferenceType ReferenceType { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; private set; }
public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1
public string MaxLocation { get; set; }
public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null,
string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null)
{
ReferenceType = referenceType;
LocationString = locationString;
Worksheet = worksheet;
LastWorksheet = lastWorksheet;
FilePath = filePath;
FileName = fileName;
Name = name;
MinLocation = minLocation;
MaxLocation = maxLocation != null ? maxLocation : minLocation;
}
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceTable)?.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = horizontalLimits[0];
MaxLocation = horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0];
MaxLocation = verticalLimits[1];
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
}
LocationString = node.Print();
}
///
/// Converts the column number to an Excel column string representation.
///
/// The zero-based column number.
private string ConvertColumnToStr(int columnNumber)
{
var sb = new System.Text.StringBuilder();
while (columnNumber >= 0)
{
sb.Insert(0, (char)(65 + columnNumber % 26));
columnNumber = columnNumber / 26 - 1;
}
return sb.ToString();
}
public override string ToString()
{
return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation);
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v152/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v152
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v160/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.IO;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v160
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.6.0", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt}
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF};
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(")
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})";
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*";
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex)
{ Priority = TerminalPriority.CellToken };
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*")
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NameInvalidWordsRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+")
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)";
public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex)
{ Priority = TerminalPriority.StructuredReference };
private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+";
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex)
{ Priority = TerminalPriority.StructuredReference };
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\](?=[^\[\]]*!)";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex)
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?=[^\[\]]*!)";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex)
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*";
private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex)
{ Priority = TerminalPriority.FileNamePath };
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier);
public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar() : base(false)
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule =
FormulaWithEq
| Formula
| ArrayFormula
| MultiRangeFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
MultiRangeFormula.Rule = eqop + Union;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference + ReduceHere()
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
ReservedName.Rule = ReservedNameToken;
Constant.Rule =
Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule =
ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule =
FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceQualifier.Rule = NameToken;
StructuredReferenceSpecifier.Rule =
SRSpecifierToken
| OpenSquareParen + SRSpecifierToken + CloseSquareParen;
StructuredReferenceColumn.Rule =
SRColumnToken
| OpenSquareParen + SRColumnToken + CloseSquareParen;
StructuredReferenceExpression.Rule =
StructuredReferenceColumn
| StructuredReferenceColumn + colon + StructuredReferenceColumn
| at + StructuredReferenceColumn
| at + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
;
StructuredReference.Rule =
OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int Name = -800;
public const int ReservedName = -700;
public const int StructuredReference = -500;
public const int FileName = -500;
public const int FileNamePath = -800;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v160;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultiRangeFormula = "MultiRangeFormula";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceColumn = "StructuredReferenceColumn";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier";
public const string StructuredReferenceQualifier = "StructuredReferenceQualifier";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRColumn = "SRColumnToken";
public const string TokenSRSpecifier = "SRSpecifierToken";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v160/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v160
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.LocationString = node.Print();
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() &&
input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0}{1}{2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.MultiRangeFormula:
case GrammarNames.StructuredReference:
case GrammarNames.StructuredReferenceColumn:
case GrammarNames.StructuredReferenceExpression:
case GrammarNames.StructuredReferenceSpecifier:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v160/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v160
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table
}
public class ParserReference
{
public const int MaxRangeHeight = 1048576;
public const int MaxRangeWidth = 16384;
public ReferenceType ReferenceType { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; private set; }
public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1
public string MaxLocation { get; set; }
public string[] TableSpecifiers { get; set; }
public string[] TableColumns { get; set; }
public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null,
string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null,
string[] tableSpecifiers = null, string[] tableColumns = null)
{
ReferenceType = referenceType;
LocationString = locationString;
Worksheet = worksheet;
LastWorksheet = lastWorksheet;
FilePath = filePath;
FileName = fileName;
Name = name;
MinLocation = minLocation;
MaxLocation = maxLocation != null ? maxLocation : minLocation;
TableColumns = tableColumns;
TableSpecifiers = tableSpecifiers;
}
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString;
TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = horizontalLimits[0];
MaxLocation = horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0];
MaxLocation = verticalLimits[1];
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
}
LocationString = node.Print();
}
private string UnEscape(string value, string escapeCharacter)
{
return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", "");
}
///
/// Converts the column number to an Excel column string representation.
///
/// The zero-based column number.
private string ConvertColumnToStr(int columnNumber)
{
var sb = new System.Text.StringBuilder();
while (columnNumber >= 0)
{
sb.Insert(0, (char)(65 + columnNumber % 26));
columnNumber = columnNumber / 26 - 1;
}
return sb.ToString();
}
public override string ToString()
{
return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation);
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v160/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v160
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v161/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.IO;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v161
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.6.1", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt}
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF};
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(")
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})";
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*");
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*";
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex)
{ Priority = TerminalPriority.CellToken };
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*")
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NameInvalidWordsRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+")
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)";
public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex)
{ Priority = TerminalPriority.StructuredReference };
private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+";
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex)
{ Priority = TerminalPriority.StructuredReference };
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\](?=[^\[\]]*!)";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex)
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?=[^\[\]]*!)";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex)
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*";
private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex)
{ Priority = TerminalPriority.FileNamePath };
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier);
public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar() : base(false)
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule =
FormulaWithEq
| Formula
| ArrayFormula
| MultiRangeFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
MultiRangeFormula.Rule = eqop + Union;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference + ReduceHere()
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
ReservedName.Rule = ReservedNameToken;
Constant.Rule =
Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule =
ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule =
FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceQualifier.Rule = NameToken;
StructuredReferenceSpecifier.Rule =
SRSpecifierToken
| OpenSquareParen + SRSpecifierToken + CloseSquareParen;
StructuredReferenceColumn.Rule =
SRColumnToken
| OpenSquareParen + SRColumnToken + CloseSquareParen;
StructuredReferenceExpression.Rule =
StructuredReferenceColumn
| StructuredReferenceColumn + colon + StructuredReferenceColumn
| at + StructuredReferenceColumn
| at + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
;
StructuredReference.Rule =
OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int Name = -800;
public const int ReservedName = -700;
public const int StructuredReference = -500;
public const int FileName = -500;
public const int FileNamePath = -800;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v161;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultiRangeFormula = "MultiRangeFormula";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceColumn = "StructuredReferenceColumn";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier";
public const string StructuredReferenceQualifier = "StructuredReferenceQualifier";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRColumn = "SRColumnToken";
public const string TokenSRSpecifier = "SRSpecifierToken";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v161/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v161
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.LocationString = node.Print();
list.Add(range);
}
else if (IsTableReference(rangeStart) && IsTableReference(rangeEnd) && rangeStart.First().Name == rangeEnd.First().Name && rangeStart.First().TableColumns.Length == 1 && rangeEnd.First().TableColumns.Length == 1)
{
ParserReference range = rangeStart.First();
range.TableColumns = rangeStart.First().TableColumns.Concat(rangeEnd.First().TableColumns).ToArray();
range.TableSpecifiers = rangeStart.First().TableSpecifiers.SequenceEqual(rangeEnd.First().TableSpecifiers) ? range.TableSpecifiers : new string[0];
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
private static bool IsTableReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Table;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0}{1}{2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.MultiRangeFormula:
case GrammarNames.StructuredReference:
case GrammarNames.StructuredReferenceColumn:
case GrammarNames.StructuredReferenceExpression:
case GrammarNames.StructuredReferenceSpecifier:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v161/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v161
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table
}
public class ParserReference
{
public const int MaxRangeHeight = 1048576;
public const int MaxRangeWidth = 16384;
public ReferenceType ReferenceType { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; private set; }
public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1
public string MaxLocation { get; set; }
public string[] TableSpecifiers { get; set; }
public string[] TableColumns { get; set; }
public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null,
string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null,
string[] tableSpecifiers = null, string[] tableColumns = null)
{
ReferenceType = referenceType;
LocationString = locationString;
Worksheet = worksheet;
LastWorksheet = lastWorksheet;
FilePath = filePath;
FileName = fileName;
Name = name;
MinLocation = minLocation;
MaxLocation = maxLocation != null ? maxLocation : minLocation;
TableColumns = tableColumns;
TableSpecifiers = tableSpecifiers;
}
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString;
TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = horizontalLimits[0];
MaxLocation = horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0];
MaxLocation = verticalLimits[1];
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
}
LocationString = node.Print();
}
private string UnEscape(string value, string escapeCharacter)
{
return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", "");
}
///
/// Converts the column number to an Excel column string representation.
///
/// The zero-based column number.
private string ConvertColumnToStr(int columnNumber)
{
var sb = new System.Text.StringBuilder();
while (columnNumber >= 0)
{
sb.Insert(0, (char)(65 + columnNumber % 26));
columnNumber = columnNumber / 26 - 1;
}
return sb.ToString();
}
public override string ToString()
{
return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation);
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v161/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v161
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v162/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v162
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.6.2", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF };
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '('))
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})";
private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*", RowPrefix);
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*";
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix)
{ Priority = TerminalPriority.CellToken };
private static readonly HashSet UnicodeLetterCategories = new HashSet
{
UnicodeCategory.UppercaseLetter,
UnicodeCategory.LowercaseLetter,
UnicodeCategory.TitlecaseLetter,
UnicodeCategory.ModifierLetter,
UnicodeCategory.OtherLetter
};
// 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators...
private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray();
private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray();
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix)
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NameInvalidWordsRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex,
ColumnPrefix.Concat(new[] { "T", "F" }).ToArray())
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)";
public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#")
{ Priority = TerminalPriority.StructuredReference };
private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+";
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex)
{ Priority = TerminalPriority.StructuredReference };
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\](?=[^\[\]]*!)";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[")
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?=[^\[\]]*!)";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[")
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*";
private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex)
{ Priority = TerminalPriority.FileNamePath };
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier);
public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar()
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule =
FormulaWithEq
| Formula
| ArrayFormula
| MultiRangeFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
MultiRangeFormula.Rule = eqop + Union;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference + ReduceHere()
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
ReservedName.Rule = ReservedNameToken;
Constant.Rule =
Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule =
ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule =
FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceQualifier.Rule = NameToken;
StructuredReferenceSpecifier.Rule =
SRSpecifierToken
| OpenSquareParen + SRSpecifierToken + CloseSquareParen;
StructuredReferenceColumn.Rule =
SRColumnToken
| OpenSquareParen + SRColumnToken + CloseSquareParen;
StructuredReferenceExpression.Rule =
StructuredReferenceColumn
| StructuredReferenceColumn + colon + StructuredReferenceColumn
| at + StructuredReferenceColumn
| at + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
;
StructuredReference.Rule =
OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int Name = -800;
public const int ReservedName = -700;
public const int StructuredReference = -500;
public const int FileName = -500;
public const int FileNamePath = -800;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v162;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultiRangeFormula = "MultiRangeFormula";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceColumn = "StructuredReferenceColumn";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier";
public const string StructuredReferenceQualifier = "StructuredReferenceQualifier";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRColumn = "SRColumnToken";
public const string TokenSRSpecifier = "SRSpecifierToken";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v162/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v162
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.LocationString = node.Print();
list.Add(range);
}
else if (IsTableReference(rangeStart) && IsTableReference(rangeEnd) && rangeStart.First().Name == rangeEnd.First().Name && rangeStart.First().TableColumns.Length == 1 && rangeEnd.First().TableColumns.Length == 1)
{
ParserReference range = rangeStart.First();
range.TableColumns = rangeStart.First().TableColumns.Concat(rangeEnd.First().TableColumns).ToArray();
range.TableSpecifiers = rangeStart.First().TableSpecifiers.SequenceEqual(rangeEnd.First().TableSpecifiers) ? range.TableSpecifiers : new string[0];
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
private static bool IsTableReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Table;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0}{1}{2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.MultiRangeFormula:
case GrammarNames.StructuredReference:
case GrammarNames.StructuredReferenceColumn:
case GrammarNames.StructuredReferenceExpression:
case GrammarNames.StructuredReferenceSpecifier:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v162/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v162
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table
}
public class ParserReference
{
public const int MaxRangeHeight = 1048576;
public const int MaxRangeWidth = 16384;
public ReferenceType ReferenceType { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; private set; }
public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1
public string MaxLocation { get; set; }
public string[] TableSpecifiers { get; set; }
public string[] TableColumns { get; set; }
public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null,
string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null,
string[] tableSpecifiers = null, string[] tableColumns = null)
{
ReferenceType = referenceType;
LocationString = locationString;
Worksheet = worksheet;
LastWorksheet = lastWorksheet;
FilePath = filePath;
FileName = fileName;
Name = name;
MinLocation = minLocation;
MaxLocation = maxLocation != null ? maxLocation : minLocation;
TableColumns = tableColumns;
TableSpecifiers = tableSpecifiers;
}
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString;
TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = horizontalLimits[0];
MaxLocation = horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0];
MaxLocation = verticalLimits[1];
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
}
LocationString = node.Print();
}
private string UnEscape(string value, string escapeCharacter)
{
return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", "");
}
///
/// Converts the column number to an Excel column string representation.
///
/// The zero-based column number.
private string ConvertColumnToStr(int columnNumber)
{
var sb = new System.Text.StringBuilder();
while (columnNumber >= 0)
{
sb.Insert(0, (char)(65 + columnNumber % 26));
columnNumber = columnNumber / 26 - 1;
}
return sb.ToString();
}
public override string ToString()
{
return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation);
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v162/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v162
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v162/WordsTerminal.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
namespace XLParser.Web.XLParserVersions.v162
{
///
/// Terminal that can determine, if there the input contains a one of expected words.
///
/// Children of each node are represented as an array to allow direct indexation. Do not use
/// for words that have a large difference between low and high character of a token.
public class WordsTerminal : Terminal
{
private readonly Node _rootNode;
private readonly List _words;
private bool _caseSensitive;
public WordsTerminal(string name, IEnumerable words) : base(name)
{
_rootNode = new Node(0);
_words = new List(words);
}
public override void Init(GrammarData grammarData)
{
base.Init(grammarData);
_caseSensitive = Grammar.CaseSensitive;
foreach (var word in _words)
{
AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant());
}
if (EditorInfo == null)
{
EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None);
}
}
public override IList GetFirsts() => _words;
public override Token TryMatch(ParsingContext context, ISourceStream source)
{
var node = _rootNode;
var input = source.Text;
for (var i = source.PreviewPosition; i < input.Length; ++i)
{
var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]);
var nextNode = node[c];
if (nextNode is null)
{
break;
}
node = nextNode;
}
if (!node.IsTerminal)
{
return null;
}
source.PreviewPosition += node.Length;
return source.CreateToken(OutputTerminal);
}
private void AddWordToTree(string word)
{
var node = _rootNode;
foreach (var c in word)
{
node = node.GetOrAddChild(c);
}
node.IsTerminal = true;
}
private class Node
{
private char _lowChar = '\0';
private char _highChar = '\0';
private Node[] _children;
public Node(int length)
{
Length = length;
}
public bool IsTerminal { get; set; }
public int Length { get; }
public Node this[char c]
{
get
{
if (_children is null)
{
return null;
}
if (c < _lowChar || c > _highChar)
{
return null;
}
return _children[c - _lowChar];
}
}
internal Node GetOrAddChild(char c)
{
if (_children is null)
{
var node = new Node(Length + 1);
_children = new[] { node };
_lowChar = c;
_highChar = c;
return node;
}
var newLowChar = (char)Math.Min(_lowChar, c);
if (newLowChar != _lowChar)
{
var newChildrenCount = _highChar - newLowChar + 1;
Array.Resize(ref _children, newChildrenCount);
var ofs = _lowChar - newLowChar;
Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs);
Array.Clear(_children, 0, ofs);
_lowChar = newLowChar;
return _children[0] = new Node(Length + 1);
}
var newHighChar = (char)Math.Max(_highChar, c);
if (newHighChar != _highChar)
{
var newChildrenCount = newHighChar - _lowChar + 1;
Array.Resize(ref _children, newChildrenCount);
_highChar = newHighChar;
return _children[newChildrenCount - 1] = new Node(Length + 1);
}
var charIdx = c - _lowChar;
var child = _children[charIdx];
if (child is null)
{
return _children[charIdx] = new Node(Length + 1);
}
return child;
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v163/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v163
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.6.3", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF };
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '('))
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})";
private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])";
private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix);
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern;
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix)
{ Priority = TerminalPriority.CellToken };
private static readonly HashSet UnicodeLetterCategories = new HashSet
{
UnicodeCategory.UppercaseLetter,
UnicodeCategory.LowercaseLetter,
UnicodeCategory.TitlecaseLetter,
UnicodeCategory.ModifierLetter,
UnicodeCategory.OtherLetter
};
// 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators...
private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray();
private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray();
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix)
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NamedRangeCombinationRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
// allow large cell references (e.g. A1048577) as named range
+ "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex,
ColumnPrefix.Concat(new[] { "T", "F" }).ToArray())
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)";
public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#")
{ Priority = TerminalPriority.StructuredReference };
private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+";
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex)
{ Priority = TerminalPriority.StructuredReference };
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\](?=[^\[\]]*!)";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[")
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?=[^\[\]]*!)";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[")
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*";
private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex)
{ Priority = TerminalPriority.FileNamePath };
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier);
public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar()
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule =
FormulaWithEq
| Formula
| ArrayFormula
| MultiRangeFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
MultiRangeFormula.Rule = eqop + Union;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference + ReduceHere()
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
ReservedName.Rule = ReservedNameToken;
Constant.Rule =
Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule =
ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule =
FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceQualifier.Rule = NameToken;
StructuredReferenceSpecifier.Rule =
SRSpecifierToken
| OpenSquareParen + SRSpecifierToken + CloseSquareParen;
StructuredReferenceColumn.Rule =
SRColumnToken
| OpenSquareParen + SRColumnToken + CloseSquareParen;
StructuredReferenceExpression.Rule =
StructuredReferenceColumn
| StructuredReferenceColumn + colon + StructuredReferenceColumn
| at + StructuredReferenceColumn
| at + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
;
StructuredReference.Rule =
OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int Name = -800;
public const int ReservedName = -700;
public const int StructuredReference = -500;
public const int FileName = -500;
public const int FileNamePath = -800;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v163;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultiRangeFormula = "MultiRangeFormula";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceColumn = "StructuredReferenceColumn";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier";
public const string StructuredReferenceQualifier = "StructuredReferenceQualifier";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRColumn = "SRColumnToken";
public const string TokenSRSpecifier = "SRSpecifierToken";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v163/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v163
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.ReferenceNode = node;
range.LocationString = node.Print();
list.Add(range);
}
else if (IsTableReference(rangeStart) && IsTableReference(rangeEnd) && rangeStart.First().Name == rangeEnd.First().Name && rangeStart.First().TableColumns.Length == 1 && rangeEnd.First().TableColumns.Length == 1)
{
ParserReference range = rangeStart.First();
range.TableColumns = rangeStart.First().TableColumns.Concat(rangeEnd.First().TableColumns).ToArray();
range.TableSpecifiers = rangeStart.First().TableSpecifiers.SequenceEqual(rangeEnd.First().TableSpecifiers) ? range.TableSpecifiers : new string[0];
range.ReferenceNode = node;
range.LocationString = node.Print();
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
private static bool IsTableReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Table;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0}{1}{2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.MultiRangeFormula:
case GrammarNames.StructuredReference:
case GrammarNames.StructuredReferenceColumn:
case GrammarNames.StructuredReferenceExpression:
case GrammarNames.StructuredReferenceSpecifier:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v163/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v163
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table
}
public class ParserReference
{
public ReferenceType ReferenceType { get; set; }
public ParseTreeNode ReferenceNode { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; set; }
public string MinLocation { get; set; }
public string MaxLocation { get; set; }
public string[] TableSpecifiers { get; set; }
public string[] TableColumns { get; set; }
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString;
TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = horizontalLimits[0];
MaxLocation = horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0];
MaxLocation = verticalLimits[1];
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
}
ReferenceNode = node;
LocationString = node.Print();
}
private string UnEscape(string value, string escapeCharacter)
{
return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", "");
}
public override string ToString()
{
return LocationString;
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v163/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v163
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v163/WordsTerminal.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
namespace XLParser.Web.XLParserVersions.v163
{
///
/// Terminal that can determine, if there the input contains a one of expected words.
///
/// Children of each node are represented as an array to allow direct indexation. Do not use
/// for words that have a large difference between low and high character of a token.
public class WordsTerminal : Terminal
{
private readonly Node _rootNode;
private readonly List _words;
private bool _caseSensitive;
public WordsTerminal(string name, IEnumerable words) : base(name)
{
_rootNode = new Node(0);
_words = new List(words);
}
public override void Init(GrammarData grammarData)
{
base.Init(grammarData);
_caseSensitive = Grammar.CaseSensitive;
foreach (var word in _words)
{
AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant());
}
if (EditorInfo == null)
{
EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None);
}
}
public override IList GetFirsts() => _words;
public override Token TryMatch(ParsingContext context, ISourceStream source)
{
var node = _rootNode;
var input = source.Text;
for (var i = source.PreviewPosition; i < input.Length; ++i)
{
var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]);
var nextNode = node[c];
if (nextNode is null)
{
break;
}
node = nextNode;
}
if (!node.IsTerminal)
{
return null;
}
source.PreviewPosition += node.Length;
return source.CreateToken(OutputTerminal);
}
private void AddWordToTree(string word)
{
var node = _rootNode;
foreach (var c in word)
{
node = node.GetOrAddChild(c);
}
node.IsTerminal = true;
}
private class Node
{
private char _lowChar = '\0';
private char _highChar = '\0';
private Node[] _children;
public Node(int length)
{
Length = length;
}
public bool IsTerminal { get; set; }
public int Length { get; }
public Node this[char c]
{
get
{
if (_children is null)
{
return null;
}
if (c < _lowChar || c > _highChar)
{
return null;
}
return _children[c - _lowChar];
}
}
internal Node GetOrAddChild(char c)
{
if (_children is null)
{
var node = new Node(Length + 1);
_children = new[] { node };
_lowChar = c;
_highChar = c;
return node;
}
var newLowChar = (char)Math.Min(_lowChar, c);
if (newLowChar != _lowChar)
{
var newChildrenCount = _highChar - newLowChar + 1;
Array.Resize(ref _children, newChildrenCount);
var ofs = _lowChar - newLowChar;
Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs);
Array.Clear(_children, 0, ofs);
_lowChar = newLowChar;
return _children[0] = new Node(Length + 1);
}
var newHighChar = (char)Math.Max(_highChar, c);
if (newHighChar != _highChar)
{
var newChildrenCount = newHighChar - _lowChar + 1;
Array.Resize(ref _children, newChildrenCount);
_highChar = newHighChar;
return _children[newChildrenCount - 1] = new Node(Length + 1);
}
var charIdx = c - _lowChar;
var child = _children[charIdx];
if (child is null)
{
return _children[charIdx] = new Node(Length + 1);
}
return child;
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v170/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v170
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.7.0", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF };
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '('))
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})";
private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])";
private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix);
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern;
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix)
{ Priority = TerminalPriority.CellToken };
private static readonly HashSet UnicodeLetterCategories = new HashSet
{
UnicodeCategory.UppercaseLetter,
UnicodeCategory.LowercaseLetter,
UnicodeCategory.TitlecaseLetter,
UnicodeCategory.ModifierLetter,
UnicodeCategory.OtherLetter
};
// 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators...
private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray();
private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray();
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix)
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NamedRangeCombinationRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
// allow large cell references (e.g. A1048577) as named range
+ "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex,
ColumnPrefix.Concat(new[] { "T", "F" }).ToArray())
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)";
public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#")
{ Priority = TerminalPriority.StructuredReference };
private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+";
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex)
{ Priority = TerminalPriority.StructuredReference };
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\](?!,)(?=.*!)";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[")
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?!,)(?=.*!)";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[")
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*";
private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex)
{ Priority = TerminalPriority.FileNamePath };
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier);
public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar()
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule =
FormulaWithEq
| Formula
| ArrayFormula
| MultiRangeFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
MultiRangeFormula.Rule = eqop + Union;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference + ReduceHere()
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
ReservedName.Rule = ReservedNameToken;
Constant.Rule =
Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule =
ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule =
FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceQualifier.Rule = NameToken;
StructuredReferenceSpecifier.Rule =
SRSpecifierToken
| at
| OpenSquareParen + SRSpecifierToken + CloseSquareParen;
StructuredReferenceColumn.Rule =
SRColumnToken
| OpenSquareParen + SRColumnToken + CloseSquareParen;
StructuredReferenceExpression.Rule =
StructuredReferenceColumn
| StructuredReferenceColumn + colon + StructuredReferenceColumn
| at + StructuredReferenceColumn
| at + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
;
StructuredReference.Rule =
OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int Name = -800;
public const int ReservedName = -700;
public const int StructuredReference = -500;
public const int FileName = -500;
public const int FileNamePath = -800;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v170;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultiRangeFormula = "MultiRangeFormula";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceColumn = "StructuredReferenceColumn";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier";
public const string StructuredReferenceQualifier = "StructuredReferenceQualifier";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRColumn = "SRColumnToken";
public const string TokenSRSpecifier = "SRSpecifierToken";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v170/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v170
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray();
if (IsCellReference(rangeStart) && IsCellReference(rangeEnd))
{
ParserReference range = rangeStart.First();
range.MaxLocation = rangeEnd.First().MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.ReferenceNode = node;
range.LocationString = node.Print();
list.Add(range);
}
else if (IsTableReference(rangeStart) && IsTableReference(rangeEnd) && rangeStart.First().Name == rangeEnd.First().Name && rangeStart.First().TableColumns.Length == 1 && rangeEnd.First().TableColumns.Length == 1)
{
ParserReference range = rangeStart.First();
range.TableColumns = rangeStart.First().TableColumns.Concat(rangeEnd.First().TableColumns).ToArray();
range.TableSpecifiers = rangeStart.First().TableSpecifiers.SequenceEqual(rangeEnd.First().TableSpecifiers) ? range.TableSpecifiers : new string[0];
range.ReferenceNode = node;
range.LocationString = node.Print();
list.Add(range);
}
else
{
list.AddRange(rangeStart);
list.AddRange(rangeEnd);
}
}
else
{
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
}
break;
}
return list;
}
private static bool IsCellReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell;
}
private static bool IsTableReference(IList references)
{
return references.Count == 1 && references.First().ReferenceType == ReferenceType.Table;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":");
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0}{1}{2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.MultiRangeFormula:
case GrammarNames.StructuredReference:
case GrammarNames.StructuredReferenceColumn:
case GrammarNames.StructuredReferenceExpression:
case GrammarNames.StructuredReferenceSpecifier:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v170/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v170
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table
}
public class ParserReference
{
public ReferenceType ReferenceType { get; set; }
public ParseTreeNode ReferenceNode { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; set; }
public string MinLocation { get; set; }
public string MaxLocation { get; set; }
public string[] TableSpecifiers { get; set; }
public string[] TableColumns { get; set; }
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString;
TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = horizontalLimits[0];
MaxLocation = horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0];
MaxLocation = verticalLimits[1];
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
}
ReferenceNode = node;
LocationString = node.Print();
}
private string UnEscape(string value, string escapeCharacter)
{
return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", "");
}
public override string ToString()
{
return LocationString;
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v170/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v170
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v170/WordsTerminal.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
namespace XLParser.Web.XLParserVersions.v170
{
///
/// Terminal that can determine, if there the input contains a one of expected words.
///
/// Children of each node are represented as an array to allow direct indexation. Do not use
/// for words that have a large difference between low and high character of a token.
public class WordsTerminal : Terminal
{
private readonly Node _rootNode;
private readonly List _words;
private bool _caseSensitive;
public WordsTerminal(string name, IEnumerable words) : base(name)
{
_rootNode = new Node(0);
_words = new List(words);
}
public override void Init(GrammarData grammarData)
{
base.Init(grammarData);
_caseSensitive = Grammar.CaseSensitive;
foreach (var word in _words)
{
AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant());
}
if (EditorInfo == null)
{
EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None);
}
}
public override IList GetFirsts() => _words;
public override Token TryMatch(ParsingContext context, ISourceStream source)
{
var node = _rootNode;
var input = source.Text;
for (var i = source.PreviewPosition; i < input.Length; ++i)
{
var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]);
var nextNode = node[c];
if (nextNode is null)
{
break;
}
node = nextNode;
}
if (!node.IsTerminal)
{
return null;
}
source.PreviewPosition += node.Length;
return source.CreateToken(OutputTerminal);
}
private void AddWordToTree(string word)
{
var node = _rootNode;
foreach (var c in word)
{
node = node.GetOrAddChild(c);
}
node.IsTerminal = true;
}
private class Node
{
private char _lowChar = '\0';
private char _highChar = '\0';
private Node[] _children;
public Node(int length)
{
Length = length;
}
public bool IsTerminal { get; set; }
public int Length { get; }
public Node this[char c]
{
get
{
if (_children is null)
{
return null;
}
if (c < _lowChar || c > _highChar)
{
return null;
}
return _children[c - _lowChar];
}
}
internal Node GetOrAddChild(char c)
{
if (_children is null)
{
var node = new Node(Length + 1);
_children = new[] { node };
_lowChar = c;
_highChar = c;
return node;
}
var newLowChar = (char)Math.Min(_lowChar, c);
if (newLowChar != _lowChar)
{
var newChildrenCount = _highChar - newLowChar + 1;
Array.Resize(ref _children, newChildrenCount);
var ofs = _lowChar - newLowChar;
Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs);
Array.Clear(_children, 0, ofs);
_lowChar = newLowChar;
return _children[0] = new Node(Length + 1);
}
var newHighChar = (char)Math.Max(_highChar, c);
if (newHighChar != _highChar)
{
var newChildrenCount = newHighChar - _lowChar + 1;
Array.Resize(ref _children, newChildrenCount);
_highChar = newHighChar;
return _children[newChildrenCount - 1] = new Node(Length + 1);
}
var charIdx = c - _lowChar;
var child = _children[charIdx];
if (child is null)
{
return _children[charIdx] = new Node(Length + 1);
}
return child;
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v171/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v171
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.7.1", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF };
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '('))
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})";
private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])";
private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix);
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern;
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix)
{ Priority = TerminalPriority.CellToken };
private static readonly HashSet UnicodeLetterCategories = new HashSet
{
UnicodeCategory.UppercaseLetter,
UnicodeCategory.LowercaseLetter,
UnicodeCategory.TitlecaseLetter,
UnicodeCategory.ModifierLetter,
UnicodeCategory.OtherLetter
};
// 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators...
private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray();
private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray();
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix)
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NamedRangeCombinationRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
// allow large cell references (e.g. A1048577) as named range
+ "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex,
ColumnPrefix.Concat(new[] { "T", "F" }).ToArray())
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)";
public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#")
{ Priority = TerminalPriority.StructuredReference };
private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+";
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex)
{ Priority = TerminalPriority.StructuredReference };
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\](?!,)(?=.*!)";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[")
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?!,)(?=.*!)";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[")
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*";
private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex)
{ Priority = TerminalPriority.FileNamePath };
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier);
public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar()
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule =
FormulaWithEq
| Formula
| ArrayFormula
| MultiRangeFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
MultiRangeFormula.Rule = eqop + Union;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference + ReduceHere()
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
ReservedName.Rule = ReservedNameToken;
Constant.Rule =
Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule =
ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule =
FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceQualifier.Rule = NameToken;
StructuredReferenceSpecifier.Rule =
SRSpecifierToken
| at
| OpenSquareParen + SRSpecifierToken + CloseSquareParen;
StructuredReferenceColumn.Rule =
SRColumnToken
| OpenSquareParen + SRColumnToken + CloseSquareParen;
StructuredReferenceExpression.Rule =
StructuredReferenceColumn
| StructuredReferenceColumn + colon + StructuredReferenceColumn
| at + StructuredReferenceColumn
| at + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
;
StructuredReference.Rule =
OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int Name = -800;
public const int ReservedName = -700;
public const int StructuredReference = -500;
public const int FileName = -500;
public const int FileNamePath = -800;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v171;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultiRangeFormula = "MultiRangeFormula";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceColumn = "StructuredReferenceColumn";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier";
public const string StructuredReferenceQualifier = "StructuredReferenceQualifier";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRColumn = "SRColumnToken";
public const string TokenSRSpecifier = "SRSpecifierToken";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v171/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v171
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).First();
var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).First();
if (rangeStart.ReferenceType == ReferenceType.Cell && rangeEnd.ReferenceType == ReferenceType.Cell)
{
ParserReference range = rangeStart;
range.MaxLocation = rangeEnd.MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.ReferenceNode = node;
range.LocationString = node.Print();
list.Add(range);
break;
}
if (rangeStart.ReferenceType == ReferenceType.Table && rangeEnd.ReferenceType == ReferenceType.Table && rangeStart.Name == rangeEnd.Name && rangeStart.TableColumns.Length == 1 && rangeEnd.TableColumns.Length == 1)
{
ParserReference range = rangeStart;
range.TableColumns = rangeStart.TableColumns.Concat(rangeEnd.TableColumns).ToArray();
range.TableSpecifiers = rangeStart.TableSpecifiers.SequenceEqual(rangeEnd.TableSpecifiers) ? range.TableSpecifiers : new string[0];
range.ReferenceNode = node;
range.LocationString = node.Print();
list.Add(range);
break;
}
}
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
}
return list;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":") && input.ChildNodes[0].ChildNodes.Last().Type() == input.ChildNodes[2].ChildNodes.Last().Type();
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0}{1}{2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.MultiRangeFormula:
case GrammarNames.StructuredReference:
case GrammarNames.StructuredReferenceColumn:
case GrammarNames.StructuredReferenceExpression:
case GrammarNames.StructuredReferenceSpecifier:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v171/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v171
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table,
UserDefinedFunction
}
public class ParserReference
{
public ReferenceType ReferenceType { get; set; }
public ParseTreeNode ReferenceNode { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; set; }
public string MinLocation { get; set; }
public string MaxLocation { get; set; }
public string[] TableSpecifiers { get; set; }
public string[] TableColumns { get; set; }
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString;
TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = horizontalLimits[0];
MaxLocation = horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0];
MaxLocation = verticalLimits[1];
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
case GrammarNames.UDFunctionCall:
ReferenceType = ReferenceType.UserDefinedFunction;
Name = node.ChildNodes[0].ChildNodes[0].Token.ValueString.TrimEnd('(');
break;
}
ReferenceNode = node;
LocationString = node.Print();
if (ReferenceType == ReferenceType.UserDefinedFunction && Name != null)
{
LocationString = LocationString.Substring(0, LocationString.IndexOf('(', LocationString.LastIndexOf(Name, System.StringComparison.Ordinal)));
}
}
private string UnEscape(string value, string escapeCharacter)
{
return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", "");
}
public override string ToString()
{
return LocationString;
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v171/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v171
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v171/WordsTerminal.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
namespace XLParser.Web.XLParserVersions.v171
{
///
/// Terminal that can determine, if there the input contains a one of expected words.
///
/// Children of each node are represented as an array to allow direct indexation. Do not use
/// for words that have a large difference between low and high character of a token.
public class WordsTerminal : Terminal
{
private readonly Node _rootNode;
private readonly List _words;
private bool _caseSensitive;
public WordsTerminal(string name, IEnumerable words) : base(name)
{
_rootNode = new Node(0);
_words = new List(words);
}
public override void Init(GrammarData grammarData)
{
base.Init(grammarData);
_caseSensitive = Grammar.CaseSensitive;
foreach (var word in _words)
{
AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant());
}
if (EditorInfo == null)
{
EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None);
}
}
public override IList GetFirsts() => _words;
public override Token TryMatch(ParsingContext context, ISourceStream source)
{
var node = _rootNode;
var input = source.Text;
for (var i = source.PreviewPosition; i < input.Length; ++i)
{
var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]);
var nextNode = node[c];
if (nextNode is null)
{
break;
}
node = nextNode;
}
if (!node.IsTerminal)
{
return null;
}
source.PreviewPosition += node.Length;
return source.CreateToken(OutputTerminal);
}
private void AddWordToTree(string word)
{
var node = _rootNode;
foreach (var c in word)
{
node = node.GetOrAddChild(c);
}
node.IsTerminal = true;
}
private class Node
{
private char _lowChar = '\0';
private char _highChar = '\0';
private Node[] _children;
public Node(int length)
{
Length = length;
}
public bool IsTerminal { get; set; }
public int Length { get; }
public Node this[char c]
{
get
{
if (_children is null)
{
return null;
}
if (c < _lowChar || c > _highChar)
{
return null;
}
return _children[c - _lowChar];
}
}
internal Node GetOrAddChild(char c)
{
if (_children is null)
{
var node = new Node(Length + 1);
_children = new[] { node };
_lowChar = c;
_highChar = c;
return node;
}
var newLowChar = (char)Math.Min(_lowChar, c);
if (newLowChar != _lowChar)
{
var newChildrenCount = _highChar - newLowChar + 1;
Array.Resize(ref _children, newChildrenCount);
var ofs = _lowChar - newLowChar;
Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs);
Array.Clear(_children, 0, ofs);
_lowChar = newLowChar;
return _children[0] = new Node(Length + 1);
}
var newHighChar = (char)Math.Max(_highChar, c);
if (newHighChar != _highChar)
{
var newChildrenCount = newHighChar - _lowChar + 1;
Array.Resize(ref _children, newChildrenCount);
_highChar = newHighChar;
return _children[newChildrenCount - 1] = new Node(Length + 1);
}
var charIdx = c - _lowChar;
var child = _children[charIdx];
if (child is null)
{
return _children[charIdx] = new Node(Length + 1);
}
return child;
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v172/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v172
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.7.2", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None)
{
DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF };
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '('))
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})";
private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])";
private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix);
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern;
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix)
{ Priority = TerminalPriority.CellToken };
private static readonly HashSet UnicodeLetterCategories = new HashSet
{
UnicodeCategory.UppercaseLetter,
UnicodeCategory.LowercaseLetter,
UnicodeCategory.TitlecaseLetter,
UnicodeCategory.ModifierLetter,
UnicodeCategory.OtherLetter
};
// 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators...
private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray();
private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray();
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix)
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NamedRangeCombinationRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
// allow large cell references (e.g. A1048577) as named range
+ "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex,
ColumnPrefix.Concat(new[] { "T", "F" }).ToArray())
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)";
public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#")
{ Priority = TerminalPriority.StructuredReference };
private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+";
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex)
{ Priority = TerminalPriority.StructuredReference };
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\](?!,)(?=.*!)";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[")
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?!,)(?=.*!)";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[")
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*";
private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex)
{ Priority = TerminalPriority.FileNamePath };
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier);
public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar()
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule =
FormulaWithEq
| Formula
| ArrayFormula
| MultiRangeFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
MultiRangeFormula.Rule = eqop + Union;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference + ReduceHere()
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
ReservedName.Rule = ReservedNameToken;
Constant.Rule =
Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule =
ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule =
FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceQualifier.Rule = NameToken;
StructuredReferenceSpecifier.Rule =
SRSpecifierToken
| at
| OpenSquareParen + SRSpecifierToken + CloseSquareParen;
StructuredReferenceColumn.Rule =
SRColumnToken
| OpenSquareParen + SRColumnToken + CloseSquareParen;
StructuredReferenceExpression.Rule =
StructuredReferenceColumn
| StructuredReferenceColumn + colon + StructuredReferenceColumn
| at + StructuredReferenceColumn
| at + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
;
StructuredReference.Rule =
OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int Name = -800;
public const int ReservedName = -700;
public const int StructuredReference = -500;
public const int FileName = -500;
public const int FileNamePath = -800;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v172;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultiRangeFormula = "MultiRangeFormula";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceColumn = "StructuredReferenceColumn";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier";
public const string StructuredReferenceQualifier = "StructuredReferenceQualifier";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRColumn = "SRColumnToken";
public const string TokenSRSpecifier = "SRSpecifierToken";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v172/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v172
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///
/// Get the parent node of a node
///
///
/// This is an expensive operation, as the whole tree will be searched through
///
public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot)
{
var parent = treeRoot.AllNodes()
.FirstOrDefault(node => node.ChildNodes.Any(c => c == child));
if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child));
return parent;
}
///
/// The node type/name
///
public static string Type(this ParseTreeNode node)
{
return node.Term.Name;
}
///
/// Check if a node is of a particular type
///
public static bool Is(this ParseTreeNode pt, string type)
{
return pt.Type() == type;
}
///
/// Checks whether this node is a function
///
public static Boolean IsFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.FunctionCall)
|| input.Is(GrammarNames.ReferenceFunctionCall)
|| input.Is(GrammarNames.UDFunctionCall)
// This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable
|| (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction())
;
}
///
/// Whether or not this node represents parentheses "(_)"
///
public static bool IsParentheses(this ParseTreeNode input)
{
switch (input.Type())
{
case GrammarNames.Formula:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula);
case GrammarNames.Reference:
return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference);
default:
return false;
}
}
public static bool IsBinaryOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 3
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall);
}
public static bool IsBinaryReferenceOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall);
}
public static bool IsUnaryOperation(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input);
}
public static bool IsUnaryPrefixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator);
}
public static bool IsUnaryPostfixOperation(this ParseTreeNode input)
{
return input.IsFunction()
&& input.ChildNodes.Count == 2
&& input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator);
}
private static string RemoveFinalSymbol(string input)
{
input = input.Substring(0, input.Length - 1);
return input;
}
///
/// Get the function or operator name of this function call
///
public static string GetFunction(this ParseTreeNode input)
{
if (input.IsIntersection())
{
return GrammarNames.TokenIntersect;
}
if (input.IsUnion())
{
return GrammarNames.TokenUnionOperator;
}
if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation())
{
return input.ChildNodes[1].Print();
}
if (input.IsUnaryPrefixOperation())
{
return input.ChildNodes[0].Print();
}
if (input.IsNamedFunction())
{
return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper();
}
if (input.IsExternalUDFunction())
{
return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}";
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Check if this node is a specific function
///
public static bool MatchFunction(this ParseTreeNode input, string functionName)
{
return IsFunction(input) && GetFunction(input) == functionName;
}
///
/// Get all the arguments of a function or operation
///
public static IEnumerable GetFunctionArguments(this ParseTreeNode input)
{
if (input.IsNamedFunction())
{
return input
.ChildNodes[1] // "Arguments" non-terminal
.ChildNodes // "Argument" non-terminals
.Select(node => node.ChildNodes[0])
;
}
if (input.IsBinaryOperation())
{
return new[] {input.ChildNodes[0], input.ChildNodes[2]};
}
if (input.IsUnaryPrefixOperation())
{
return new[] {input.ChildNodes[1]};
}
if (input.IsUnaryPostfixOperation())
{
return new[] {input.ChildNodes[0]};
}
if (input.IsUnion())
{
return input.ChildNodes[0].ChildNodes;
}
if (input.IsExternalUDFunction())
{
return input // Reference
.ChildNodes[1] // UDFunctionCall
.ChildNodes[1] // Arguments
.ChildNodes // Argument non-terminals
.Select(node => node.ChildNodes[0])
;
}
throw new ArgumentException("Not a function call", nameof(input));
}
///
/// Checks whether this node is a built-in excel function
///
public static bool IsBuiltinFunction(this ParseTreeNode node)
{
return node.IsFunction() &&
(node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName));
}
///
/// Whether or not this node represents an intersection
///
public static bool IsIntersection(this ParseTreeNode input)
{
return IsBinaryOperation(input) &&
input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect;
}
///
/// Whether or not this node represents an union
///
public static bool IsUnion(this ParseTreeNode input)
{
return input.Is(GrammarNames.ReferenceFunctionCall)
&& input.ChildNodes.Count == 1
&& input.ChildNodes[0].Is(GrammarNames.Union);
}
///
/// Checks whether this node is a function call with name, and not just a unary or binary operation
///
public static bool IsNamedFunction(this ParseTreeNode input)
{
return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName))
|| (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName))
|| input.Is(GrammarNames.UDFunctionCall);
}
public static bool IsOperation(this ParseTreeNode input)
{
return input.IsBinaryOperation() || input.IsUnaryOperation();
}
public static bool IsExternalUDFunction(this ParseTreeNode input)
{
return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction();
}
///
/// True if this node presents a number constant with a sign
///
public static bool IsNumberWithSign(this ParseTreeNode input)
{
return IsUnaryPrefixOperation(input)
&& input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant)
&& input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number);
}
///
/// Extract all of the information from a Prefix non-terminal
///
public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix);
///
/// Go to the first non-formula child node
///
public static ParseTreeNode SkipFormula(this ParseTreeNode input)
{
while (input.Is(GrammarNames.Formula))
{
input = input.ChildNodes.First();
}
return input;
}
///
/// Get all child nodes that are references and aren't part of another reference expression
///
public static IEnumerable GetReferenceNodes(this ParseTreeNode input)
{
return input.AllNodesConditional(node => node.Is(GrammarNames.Reference))
.Where(node => node.Is(GrammarNames.Reference))
.Select(node => node.SkipToRelevant())
;
}
///
/// Gets the ParserReferences from the input parse tree node and its children
///
///
/// 5 cases:
/// 1. ReferenceItem node: convert to ParserReference
/// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node)
/// (to include the references in the arguments of external UDFs)
/// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange
/// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences
/// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node)
///
public static IEnumerable GetParserReferences(this ParseTreeNode node)
{
if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1)
node = node.ChildNodes[0];
var list = new List();
switch (node.Type())
{
case GrammarNames.Cell:
case GrammarNames.NamedRange:
case GrammarNames.HorizontalRange:
case GrammarNames.VerticalRange:
case GrammarNames.StructuredReference:
case GrammarNames.RefError:
list.Add(new ParserReference(node));
break;
case GrammarNames.Reference:
list.Add(new ParserReference(node));
list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
default:
if (node.IsRange())
{
var rangeStart = GetParserReferences(node.ChildNodes[0]).FirstOrDefault();
var rangeEnd = GetParserReferences(node.ChildNodes[2]).FirstOrDefault();
if (rangeStart?.ReferenceType == ReferenceType.Cell && rangeEnd?.ReferenceType == ReferenceType.Cell)
{
ParserReference range = rangeStart;
range.MaxLocation = rangeEnd.MinLocation;
range.ReferenceType = ReferenceType.CellRange;
range.ReferenceNode = node;
range.LocationString = node.Print();
list.Add(range);
break;
}
if (rangeStart?.ReferenceType == ReferenceType.Table && rangeEnd?.ReferenceType == ReferenceType.Table && rangeStart.Name == rangeEnd.Name && rangeStart.TableColumns.Length == 1 && rangeEnd.TableColumns.Length == 1)
{
ParserReference range = rangeStart;
range.TableColumns = rangeStart.TableColumns.Concat(rangeEnd.TableColumns).ToArray();
range.TableSpecifiers = rangeStart.TableSpecifiers.SequenceEqual(rangeEnd.TableSpecifiers) ? range.TableSpecifiers : new string[0];
range.ReferenceNode = node;
range.LocationString = node.Print();
list.Add(range);
break;
}
}
list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences()));
break;
}
return list;
}
///
/// Whether or not this node represents a range
///
public static bool IsRange(this ParseTreeNode input)
{
return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":") && input.ChildNodes[0].ChildNodes.Last().Type() == input.ChildNodes[2].ChildNodes.Last().Type();
}
///
/// Go to the first "relevant" child node, i.e. skips wrapper nodes
///
/// The input parse tree node
/// If true, skip all reference nodes without a prefix instead of only parentheses
///
/// Skips:
/// * FormulaWithEq and ArrayFormula nodes
/// * Formula nodes
/// * Parentheses
/// * Reference nodes which are just wrappers
///
public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false)
{
while (true)
{
switch (input.Type())
{
case GrammarNames.FormulaWithEq:
case GrammarNames.ArrayFormula:
input = input.ChildNodes[1];
break;
case GrammarNames.Argument:
case GrammarNames.Formula:
if (input.ChildNodes.Count == 1)
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
case GrammarNames.Reference:
// Skip references which are parentheses
// Skip references without a prefix (=> they only have one child node) if the option is set
if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses())
{
input = input.ChildNodes[0];
}
else
{
return input;
}
break;
default:
return input;
}
}
}
///
/// Pretty-print a parse tree to a string
///
public static string Print(this ParseTreeNode input)
{
// For terminals, just print the token text
if (input.Term is Terminal)
{
return input.Token.Text;
}
// (Lazy) enumerable for printed children
var children = input.ChildNodes.Select(Print);
// Concrete list when needed
List childrenList;
// Switch on non-terminals
switch (input.Term.Name)
{
case GrammarNames.Formula:
// Check if these are brackets, otherwise print first child
return IsParentheses(input) ? $"({children.First()})" : children.First();
case GrammarNames.FunctionCall:
case GrammarNames.ReferenceFunctionCall:
case GrammarNames.UDFunctionCall:
childrenList = children.ToList();
if (input.IsNamedFunction())
{
return string.Join("", childrenList) + ")";
}
if (input.IsBinaryOperation())
{
// format string for "normal" binary operation
string format = "{0}{1}{2}";
if (input.IsIntersection())
{
format = "{0} {2}";
}
return string.Format(format, childrenList[0], childrenList[1], childrenList[2]);
}
if (input.IsUnion())
{
return $"({string.Join(",", childrenList)})";
}
if (input.IsUnaryOperation())
{
return string.Join("", childrenList);
}
throw new ArgumentException("Unknown function type.");
case GrammarNames.Reference:
return IsParentheses(input) ? $"({children.First()})" : string.Concat(children);
case GrammarNames.Prefix:
var ret = string.Join("", children);
// The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file
if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File))
{
ret += "!";
}
return ret;
case GrammarNames.ArrayFormula:
return "{=" + children.ElementAt(1) + "}";
// Terms for which to print all child nodes concatenated
case GrammarNames.ArrayConstant:
case GrammarNames.DynamicDataExchange:
case GrammarNames.FormulaWithEq:
case GrammarNames.File:
case GrammarNames.MultiRangeFormula:
case GrammarNames.StructuredReference:
case GrammarNames.StructuredReferenceColumn:
case GrammarNames.StructuredReferenceExpression:
case GrammarNames.StructuredReferenceSpecifier:
return string.Join("", children);
// Terms for which we print the children comma-separated
case GrammarNames.Arguments:
case GrammarNames.ArrayRows:
case GrammarNames.Union:
return string.Join(",", children);
case GrammarNames.ArrayColumns:
return string.Join(";", children);
case GrammarNames.ConstantArray:
return $"{{{children.First()}}}";
default:
// If it is not defined above and the number of children is exactly one, we want to just print the first child
if (input.ChildNodes.Count == 1)
{
return children.First();
}
throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine +
"This probably means the Excel grammar was modified without the print function being modified");
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v172/ParserReference.cs
================================================
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v172
{
public enum ReferenceType
{
Cell,
CellRange,
UserDefinedName,
HorizontalRange,
VerticalRange,
RefError,
Table,
UserDefinedFunction
}
public class ParserReference
{
public ReferenceType ReferenceType { get; set; }
public ParseTreeNode ReferenceNode { get; set; }
public string LocationString { get; set; }
public string Worksheet { get; set; }
public string LastWorksheet { get; set; }
public string FilePath { get; set; }
public string FileName { get; set; }
public string Name { get; set; }
public string MinLocation { get; set; }
public string MaxLocation { get; set; }
public string[] TableSpecifiers { get; set; }
public string[] TableColumns { get; set; }
public ParserReference(ParseTreeNode node)
{
InitializeReference(node);
}
///
/// Initializes the current object based on the input ParseTreeNode
///
///
/// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and
/// is re-invoked for the ReferenceItem node.
///
public void InitializeReference(ParseTreeNode node)
{
switch (node.Type())
{
case GrammarNames.Reference:
PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo();
Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)";
if (prefix.HasMultipleSheets)
{
string[] sheets = prefix.MultipleSheets.Split(':');
Worksheet = sheets[0];
LastWorksheet = sheets[1];
}
if (prefix.HasFilePath)
{
FilePath = prefix.FilePath;
}
if (prefix.HasFileNumber)
{
FileName = prefix.FileNumber.ToString();
}
else if (prefix.HasFileName)
{
FileName = prefix.FileName;
}
InitializeReference(node.ChildNodes[1]);
break;
case GrammarNames.Cell:
ReferenceType = ReferenceType.Cell;
MinLocation = node.ChildNodes[0].Token.ValueString;
MaxLocation = MinLocation;
break;
case GrammarNames.NamedRange:
ReferenceType = ReferenceType.UserDefinedName;
Name = node.ChildNodes[0].Token.ValueString;
break;
case GrammarNames.StructuredReference:
ReferenceType = ReferenceType.Table;
Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString;
TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray();
break;
case GrammarNames.HorizontalRange:
string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.HorizontalRange;
MinLocation = horizontalLimits[0];
MaxLocation = horizontalLimits[1];
break;
case GrammarNames.VerticalRange:
string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':');
ReferenceType = ReferenceType.VerticalRange;
MinLocation = verticalLimits[0];
MaxLocation = verticalLimits[1];
break;
case GrammarNames.RefError:
ReferenceType = ReferenceType.RefError;
break;
case GrammarNames.UDFunctionCall:
ReferenceType = ReferenceType.UserDefinedFunction;
Name = node.ChildNodes[0].ChildNodes[0].Token.ValueString.TrimEnd('(');
break;
}
ReferenceNode = node;
LocationString = node.Print();
if (ReferenceType == ReferenceType.UserDefinedFunction && Name != null)
{
LocationString = LocationString.Substring(0, LocationString.IndexOf('(', LocationString.LastIndexOf(Name, System.StringComparison.Ordinal)));
}
}
private string UnEscape(string value, string escapeCharacter)
{
return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", "");
}
public override string ToString()
{
return LocationString;
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v172/PrefixInfo.cs
================================================
using System;
using System.Reflection;
using System.Text;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v172
{
// TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23
///
/// Simple data class that holds information about a Prefix.
///
///
public class PrefixInfo : IEquatable
{
public string FilePath { get; }
public bool HasFilePath => FilePath != null;
private readonly int? _fileNumber;
public int FileNumber => _fileNumber.GetValueOrDefault();
public bool HasFileNumber => _fileNumber.HasValue;
public string FileName { get; }
public bool HasFileName => FileName != null;
public bool HasFile => HasFileName || HasFileNumber;
public string Sheet { get; }
public bool HasSheet => Sheet != null;
public string MultipleSheets { get; }
public bool HasMultipleSheets => MultipleSheets != null;
public bool IsQuoted { get; }
public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false)
{
Sheet = sheet;
_fileNumber = fileNumber;
FileName = fileName;
FilePath = filePath;
MultipleSheets = multipleSheets;
IsQuoted = isQuoted;
}
///
/// Create a PrefixInfo class from a parse tree node
///
internal static PrefixInfo From(ParseTreeNode prefix)
{
if (prefix.Type() != GrammarNames.Prefix)
{
throw new ArgumentException("Not a prefix", nameof(prefix));
}
string filePath = null;
int? fileNumber = null;
string fileName = null;
string sheetName = null;
string multipleSheets = null;
// Token number we're processing
var cur = 0;
// Check for quotes
var isQuoted = prefix.ChildNodes[cur].Is("'");
if (isQuoted)
{
cur++;
}
// Check and process file
if (prefix.ChildNodes[cur].Is(GrammarNames.File))
{
ParseTreeNode file = prefix.ChildNodes[cur];
if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric))
{
// Numeric filename
fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?);
}
else
{
// String filename
var iCur = 0;
// Check if it includes a path
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath))
{
filePath = file.ChildNodes[iCur].Print();
iCur++;
}
if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets))
{
fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1);
}
else
{
fileName = file.ChildNodes[iCur].Print();
}
}
cur++;
}
// Check for a non-quoted sheet
if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet))
{
sheetName = Substr(prefix.ChildNodes[cur].Print(), 1);
}
// Check for a quoted sheet
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted))
{
// remove quote and !
sheetName = Substr(prefix.ChildNodes[cur].Print(), 2);
if (sheetName == "")
{
// The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37)
// We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name.
sheetName = " ";
}
}
// Check if multiple sheets
else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets))
{
multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1);
}
return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted);
}
internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText)
{
var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText);
SourceLocation currentLocation = quotedSheetNode.Span.Location;
if (newPosition == currentLocation.Position)
{
return;
}
var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition);
quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition);
// Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection.
typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public)
?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length));
}
private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText)
{
var startIndex = nodeSheetQuoted.Span.Location.Position;
while (startIndex > 0)
{
if (!char.IsWhiteSpace(sourceText[startIndex - 1]))
{
break;
}
startIndex--;
}
return startIndex;
}
private static string Substr(string s, int removeLast = 0, int removeFirst = 0)
{
return s.Substring(removeFirst, s.Length - removeLast - removeFirst);
}
public override bool Equals(object other) => Equals(other as PrefixInfo);
public bool Equals(PrefixInfo other)
{
if (ReferenceEquals(null, other)) return false;
if (ReferenceEquals(this, other)) return true;
return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase);
}
public override int GetHashCode()
{
unchecked
{
var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? "");
hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0);
hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0);
hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0);
hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0);
return hashCode;
}
}
public static bool operator ==(PrefixInfo left, PrefixInfo right)
{
return Equals(left, right);
}
public static bool operator !=(PrefixInfo left, PrefixInfo right)
{
return !Equals(left, right);
}
public override string ToString()
{
var res = new StringBuilder();
if (IsQuoted) res.Append("'");
if (HasFilePath) res.Append(FilePath);
if (HasFileNumber) res.Append($"[{FileNumber}]");
if (HasFileName) res.Append($"[{FileName}]");
if (HasSheet) res.Append(Sheet);
if (HasMultipleSheets) res.Append(MultipleSheets);
if (IsQuoted) res.Append("'");
res.Append("!");
return res.ToString();
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v172/WordsTerminal.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
namespace XLParser.Web.XLParserVersions.v172
{
///
/// Terminal that can determine, if there the input contains a one of expected words.
///
/// Children of each node are represented as an array to allow direct indexation. Do not use
/// for words that have a large difference between low and high character of a token.
public class WordsTerminal : Terminal
{
private readonly Node _rootNode;
private readonly List _words;
private bool _caseSensitive;
public WordsTerminal(string name, IEnumerable words) : base(name)
{
_rootNode = new Node(0);
_words = new List(words);
}
public override void Init(GrammarData grammarData)
{
base.Init(grammarData);
_caseSensitive = Grammar.CaseSensitive;
foreach (var word in _words)
{
AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant());
}
if (EditorInfo == null)
{
EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None);
}
}
public override IList GetFirsts() => _words;
public override Token TryMatch(ParsingContext context, ISourceStream source)
{
var node = _rootNode;
var input = source.Text;
for (var i = source.PreviewPosition; i < input.Length; ++i)
{
var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]);
var nextNode = node[c];
if (nextNode is null)
{
break;
}
node = nextNode;
}
if (!node.IsTerminal)
{
return null;
}
source.PreviewPosition += node.Length;
return source.CreateToken(OutputTerminal);
}
private void AddWordToTree(string word)
{
var node = _rootNode;
foreach (var c in word)
{
node = node.GetOrAddChild(c);
}
node.IsTerminal = true;
}
private class Node
{
private char _lowChar = '\0';
private char _highChar = '\0';
private Node[] _children;
public Node(int length)
{
Length = length;
}
public bool IsTerminal { get; set; }
public int Length { get; }
public Node this[char c]
{
get
{
if (_children is null)
{
return null;
}
if (c < _lowChar || c > _highChar)
{
return null;
}
return _children[c - _lowChar];
}
}
internal Node GetOrAddChild(char c)
{
if (_children is null)
{
var node = new Node(Length + 1);
_children = new[] { node };
_lowChar = c;
_highChar = c;
return node;
}
var newLowChar = (char)Math.Min(_lowChar, c);
if (newLowChar != _lowChar)
{
var newChildrenCount = _highChar - newLowChar + 1;
Array.Resize(ref _children, newChildrenCount);
var ofs = _lowChar - newLowChar;
Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs);
Array.Clear(_children, 0, ofs);
_lowChar = newLowChar;
return _children[0] = new Node(Length + 1);
}
var newHighChar = (char)Math.Max(_highChar, c);
if (newHighChar != _highChar)
{
var newChildrenCount = newHighChar - _lowChar + 1;
Array.Resize(ref _children, newChildrenCount);
_highChar = newHighChar;
return _children[newChildrenCount - 1] = new Node(Length + 1);
}
var charIdx = c - _lowChar;
var child = _children[charIdx];
if (child is null)
{
return _children[charIdx] = new Node(Length + 1);
}
return child;
}
}
}
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v173/ExcelFormulaGrammar.cs
================================================
using Irony.Parsing;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
namespace XLParser.Web.XLParserVersions.v173
{
///
/// Contains the XLParser grammar
///
[Language("Excel Formulas", "1.7.3", "Grammar for Excel Formulas")]
public class ExcelFormulaGrammar : Grammar
{
#region 1-Terminals
#region Symbols and operators
public Terminal at => ToTerm("@");
public Terminal comma => ToTerm(",");
public Terminal colon => ToTerm(":");
public Terminal hash => ToTerm("#");
public Terminal semicolon => ToTerm(";");
public Terminal OpenParen => ToTerm("(");
public Terminal CloseParen => ToTerm(")");
public Terminal CloseSquareParen => ToTerm("]");
public Terminal OpenSquareParen => ToTerm("[");
public Terminal exclamationMark => ToTerm("!");
public Terminal CloseCurlyParen => ToTerm("}");
public Terminal OpenCurlyParen => ToTerm("{");
public Terminal QuoteS => ToTerm("'");
public Terminal mulop => ToTerm("*");
public Terminal plusop => ToTerm("+");
public Terminal divop => ToTerm("/");
public Terminal minop => ToTerm("-");
public Terminal concatop => ToTerm("&");
public Terminal expop => ToTerm("^");
// Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal
// Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low
public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect);
public Terminal percentop => ToTerm("%");
public Terminal gtop => ToTerm(">");
public Terminal eqop => ToTerm("=");
public Terminal ltop => ToTerm("<");
public Terminal neqop => ToTerm("<>");
public Terminal gteop => ToTerm(">=");
public Terminal lteop => ToTerm("<=");
#endregion
#region Literals
public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F")
{
Priority = TerminalPriority.Bool
};
public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.AllowStartEndDot)
{
DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }
};
public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes);
public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'",
StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes)
{ Priority = TerminalPriority.SingleQuotedString };
public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#");
public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError);
#endregion
#region Functions
private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers
private const string AllUdfChars = SpecialUdfChars + @"\\.\w";
private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)";
// The following regex uses the rather exotic feature Character Class Subtraction
// https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction
private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\(";
public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF };
public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C")
{ Priority = TerminalPriority.ExcelRefFunction };
public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '('))
{ Priority = TerminalPriority.ExcelFunction };
// Using this instead of Empty allows a more accurate tree
public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument);
#endregion
#region References and names
private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})";
private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])";
private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix);
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix);
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern;
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix)
{ Priority = TerminalPriority.CellToken };
private static readonly HashSet UnicodeLetterCategories = new HashSet
{
UnicodeCategory.UppercaseLetter,
UnicodeCategory.LowercaseLetter,
UnicodeCategory.TitlecaseLetter,
UnicodeCategory.ModifierLetter,
UnicodeCategory.OtherLetter
};
// 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators...
private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray();
private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray();
// Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark
private const string NameStartCharRegex = @"[\p{L}\\_]";
private const string NameValidCharacterRegex = @"[\w\\_\.\?€]";
public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix)
{ Priority = TerminalPriority.Name };
// Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference.
// If we ever parse R1C1 references, make sure to include them here
// TODO: Add all function names here
private const string NamedRangeCombinationRegex =
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
// allow large cell references (e.g. A1048577) as named range
+ "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)"
;
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex,
ColumnPrefix.Concat(new[] { "T", "F" }).ToArray())
{ Priority = TerminalPriority.NamedRangeCombination };
public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_")
{ Priority = TerminalPriority.ReservedName };
#region Structured References
private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)";
public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#")
{ Priority = TerminalPriority.StructuredReference };
private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+";
public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex)
{ Priority = TerminalPriority.StructuredReference };
#endregion
#region Prefixes
private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, ";
private const string notSheetNameChars = @"'*\[\]\\:/?";
//const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\"";
//const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!";
private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+";
private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*";
//private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!";
public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!")
{ Priority = TerminalPriority.SheetToken };
public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!")
{ Priority = TerminalPriority.SheetQuotedToken };
private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!";
private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!";
public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex)
{ Priority = TerminalPriority.MultipleSheetsToken };
private const string fileNameNumericRegex = @"\[[0-9]+\](?!,)(?=.*!)";
public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[")
{ Priority = TerminalPriority.FileNameNumericToken };
private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?!,)(?=.*!)";
public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[")
{ Priority = TerminalPriority.FileName };
// Source: https://stackoverflow.com/a/14632579
private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}";
public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex)
{ Priority = TerminalPriority.FileName };
// Source: http://stackoverflow.com/a/6416209/572635
private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*";
private const string urlPathRegex = @"http(s?)\://([\p{L}\p{N}-_]+\.[\p{L}\p{N}-_]*)+(:[0-9]+)?/([\p{L}\p{N}\-\.\?\,\'+&%\$#_ ()]*/)*";
private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")";
public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex)
{ Priority = TerminalPriority.FileNamePath };
#endregion
#endregion
#endregion
#region 2-NonTerminals
// Most non-terminals are first defined here, so they can be used anywhere in the rules
// Otherwise you can only use non-terminals that have been defined previously
public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument);
public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments);
public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns);
public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant);
public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula);
public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows);
public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool);
public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell);
public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant);
public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray);
public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange);
public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument);
public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error);
public NonTerminal File { get; } = new NonTerminal(GrammarNames.File);
public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula);
public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq);
public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall);
public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName);
public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange);
public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp);
public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula);
public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange);
public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number);
public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp);
public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix);
public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp);
public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet);
public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference);
public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem);
public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall);
public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError);
public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName);
public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName);
public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet);
public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart);
public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference);
public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn);
public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression);
public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier);
public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier);
public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text);
public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName);
public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall);
public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union);
public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange);
#endregion
public ExcelFormulaGrammar()
{
#region Punctuation
MarkPunctuation(OpenParen, CloseParen);
MarkPunctuation(OpenCurlyParen, CloseCurlyParen);
#endregion
#region Rules
#region Base rules
Root = Start;
Start.Rule =
FormulaWithEq
| Formula
| ArrayFormula
| MultiRangeFormula
;
MarkTransient(Start);
ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen;
MultiRangeFormula.Rule = eqop + Union;
FormulaWithEq.Rule = eqop + Formula;
Formula.Rule =
Reference + ReduceHere()
| Constant
| FunctionCall
| ConstantArray
| OpenParen + Formula + CloseParen
| ReservedName
;
ReservedName.Rule = ReservedNameToken;
Constant.Rule =
Number
| Text
| Bool
| Error
;
Text.Rule = TextToken;
Number.Rule = NumberToken;
Bool.Rule = BoolToken;
Error.Rule = ErrorToken;
RefError.Rule = RefErrorToken;
#endregion
#region Functions
FunctionCall.Rule =
FunctionName + Arguments + CloseParen
| PrefixOp + Formula
| Formula + PostfixOp
| Formula + InfixOp + Formula
;
FunctionName.Rule = ExcelFunction;
Arguments.Rule = MakeStarRule(Arguments, comma, Argument);
EmptyArgument.Rule = EmptyArgumentToken;
Argument.Rule = Formula | EmptyArgument;
PrefixOp.Rule =
ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop
| ImplyPrecedenceHere(Precedence.UnaryPreFix) + at;
MarkTransient(PrefixOp);
InfixOp.Rule =
expop
| mulop
| divop
| plusop
| minop
| concatop
| gtop
| eqop
| ltop
| neqop
| gteop
| lteop;
MarkTransient(InfixOp);
// ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action
PostfixOp.Rule = PreferShiftHere() + percentop;
MarkTransient(PostfixOp);
#endregion
#region References
Reference.Rule =
ReferenceItem
| ReferenceFunctionCall
| OpenParen + Reference + PreferShiftHere() + CloseParen
| Prefix + ReferenceItem
| DynamicDataExchange
;
ReferenceFunctionCall.Rule =
Reference + colon + Reference
| Reference + intersectop + Reference
| OpenParen + Union + CloseParen
| RefFunctionName + Arguments + CloseParen
| Reference + hash
;
RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken;
Union.Rule = MakePlusRule(Union, comma, Reference);
ReferenceItem.Rule =
Cell
| NamedRange
| VRange
| HRange
| RefError
| UDFunctionCall
| StructuredReference
;
MarkTransient(ReferenceItem);
UDFunctionCall.Rule = UDFName + Arguments + CloseParen;
UDFName.Rule = UDFToken;
VRange.Rule = VRangeToken;
HRange.Rule = HRangeToken;
Cell.Rule = CellToken;
File.Rule =
FileNameNumericToken
| FileNameEnclosedInBracketsToken
| FilePathToken + FileNameEnclosedInBracketsToken
| FilePathToken + FileName
;
DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken;
NamedRange.Rule = NameToken | NamedRangeCombinationToken;
Prefix.Rule =
SheetToken
| QuoteS + SheetQuotedToken
| File + SheetToken
| QuoteS + File + SheetQuotedToken
| File + exclamationMark
| MultipleSheetsToken
| QuoteS + MultipleSheetsQuotedToken
| File + MultipleSheetsToken
| QuoteS + File + MultipleSheetsQuotedToken
| RefErrorToken
;
StructuredReferenceQualifier.Rule = NameToken;
StructuredReferenceSpecifier.Rule =
SRSpecifierToken
| at
| OpenSquareParen + SRSpecifierToken + CloseSquareParen;
StructuredReferenceColumn.Rule =
SRColumnToken
| OpenSquareParen + SRColumnToken + CloseSquareParen;
StructuredReferenceExpression.Rule =
StructuredReferenceColumn
| StructuredReferenceColumn + colon + StructuredReferenceColumn
| at + StructuredReferenceColumn
| at + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn
| StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn
;
StructuredReference.Rule =
OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen
| StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen
;
#endregion
#region Arrays
ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen;
ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows);
ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant);
ArrayConstant.Rule = Constant | PrefixOp + Number | RefError;
#endregion
#endregion
#region 5-Operator Precedence
// Some of these operators are neutral associative instead of left associative,
// but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific
// structure of the parse tree, we like consistency.
RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop);
RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop);
RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop);
RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop);
RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop);
RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash);
RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at);
RegisterOperators(Precedence.Union, Associativity.Left, comma);
RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop);
RegisterOperators(Precedence.Range, Associativity.Left, colon);
#endregion
}
#region Precedence and Priority constants
// Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a
// Could also be an enum, but this way you don't need int casts
private static class Precedence
{
// Don't use priority 0, Irony seems to view it as no priority set
public const int Comparison = 1;
public const int Concatenation = 2;
public const int Addition = 3;
public const int Multiplication = 4;
public const int Exponentiation = 5;
public const int UnaryPostFix = 6;
public const int UnaryPreFix = 7;
//public const int Reference = 8;
public const int Union = 9;
public const int Intersection = 10;
public const int Range = 11;
}
// Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match
// E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority
// E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination
private static class TerminalPriority
{
// Irony Low value
//public const int Low = -1000;
public const int Name = -800;
public const int ReservedName = -700;
public const int StructuredReference = -500;
public const int FileName = -500;
public const int FileNamePath = -800;
public const int SingleQuotedString = -100;
// Irony Normal value, default value
//public const int Normal = 0;
public const int Bool = 0;
public const int MultipleSheetsToken = 100;
// Irony High value
//public const int High = 1000;
public const int CellToken = 1000;
public const int NamedRangeCombination = 1100;
public const int UDF = 1150;
public const int ExcelFunction = 1200;
public const int ExcelRefFunction = 1200;
public const int FileNameNumericToken = 1200;
public const int SheetToken = 1200;
public const int SheetQuotedToken = 1200;
}
#endregion
private static string[] excelFunctionList => GetExcelFunctionList();
private static string[] GetExcelFunctionList()
{
var resource = Properties.Resources.ExcelBuiltinFunctionList_v173;
using (var sr = new StringReader(resource))
return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
}
}
#region Names
///
/// Collection of names used for terminals and non-terminals in the Excel Formula Grammar.
///
///
/// Using these is strongly recommended, as these will change when breaking changes occur.
/// It also allows you to see which code works on what grammar constructs.
///
// Keep these constants instead of methods/properties, since that allows them to be used in switch statements.
public static class GrammarNames
{
#region Non-Terminals
public const string Argument = "Argument";
public const string Arguments = "Arguments";
public const string ArrayColumns = "ArrayColumns";
public const string ArrayConstant = "ArrayConstant";
public const string ArrayFormula = "ArrayFormula";
public const string ArrayRows = "ArrayRows";
public const string Bool = "Bool";
public const string Cell = "Cell";
public const string Constant = "Constant";
public const string ConstantArray = "ConstantArray";
public const string DynamicDataExchange = "DynamicDataExchange";
public const string EmptyArgument = "EmptyArgument";
public const string Error = "Error";
public const string ExcelFunction = "ExcelFunction";
public const string File = "File";
public const string Formula = "Formula";
public const string FormulaWithEq = "FormulaWithEq";
public const string FunctionCall = "FunctionCall";
public const string FunctionName = "FunctionName";
public const string HorizontalRange = "HRange";
public const string MultiRangeFormula = "MultiRangeFormula";
public const string NamedRange = "NamedRange";
public const string Number = "Number";
public const string Prefix = "Prefix";
public const string QuotedFileSheet = "QuotedFileSheet";
public const string Range = "Range";
public const string Reference = "Reference";
public const string ReferenceFunctionCall = "ReferenceFunctionCall";
public const string RefError = "RefError";
public const string RefFunctionName = "RefFunctionName";
public const string ReservedName = "ReservedName";
public const string Sheet = "Sheet";
public const string StructuredReference = "StructuredReference";
public const string StructuredReferenceColumn = "StructuredReferenceColumn";
public const string StructuredReferenceExpression = "StructuredReferenceExpression";
public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier";
public const string StructuredReferenceQualifier = "StructuredReferenceQualifier";
public const string Text = "Text";
public const string UDFName = "UDFName";
public const string UDFunctionCall = "UDFunctionCall";
public const string Union = "Union";
public const string VerticalRange = "VRange";
#endregion
#region Transient Non-Terminals
public const string TransientStart = "Start";
public const string TransientInfixOp = "InfixOp";
public const string TransientPostfixOp = "PostfixOp";
public const string TransientPrefixOp = "PrefixOp";
public const string TransientReferenceItem = "ReferenceItem";
#endregion
#region Terminals
public const string TokenBool = "BoolToken";
public const string TokenCell = "CellToken";
public const string TokenEmptyArgument = "EmptyArgumentToken";
public const string TokenError = "ErrorToken";
public const string TokenExcelRefFunction = "ExcelRefFunctionToken";
public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken";
public const string TokenFilePath = "FilePathToken";
public const string TokenFileName = "FileNameToken";
public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken";
public const string TokenFileNameNumeric = "FileNameNumericToken";
public const string TokenHRange = "HRangeToken";
public const string TokenIntersect = "INTERSECT";
public const string TokenMultipleSheets = "MultipleSheetsToken";
public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken";
public const string TokenName = "NameToken";
public const string TokenNamedRangeCombination = "NamedRangeCombinationToken";
public const string TokenNumber = "NumberToken";
public const string TokenRefError = "RefErrorToken";
public const string TokenReservedName = "ReservedNameToken";
public const string TokenSingleQuotedString = "SingleQuotedString";
public const string TokenSheet = "SheetNameToken";
public const string TokenSheetQuoted = "SheetNameQuotedToken";
public const string TokenSRColumn = "SRColumnToken";
public const string TokenSRSpecifier = "SRSpecifierToken";
public const string TokenText = "TextToken";
public const string TokenUDF = "UDFToken";
public const string TokenUnionOperator = ",";
public const string TokenVRange = "VRangeToken";
#endregion
}
#endregion
}
================================================
FILE: app/XLParser.Web/XLParserVersions/v173/ExcelFormulaParser.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using Irony.Parsing;
namespace XLParser.Web.XLParserVersions.v173
{
///
/// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier.
///
public static class ExcelFormulaParser
{
///
/// Thread-local singleton parser instance
///
[ThreadStatic] private static Parser _p;
///
/// Thread-safe parser
///
private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar()));
///
/// Parse a formula, return the the tree's root node
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree root node
public static ParseTreeNode Parse(string input)
{
return ParseToTree(input).Root;
}
///
/// Parse a formula, return the the tree
///
/// The formula to be parsed.
///
/// If formula could not be parsed
///
/// Parse tree
public static ParseTree ParseToTree(string input)
{
var tree = P.Parse(input);
if (tree.HasErrors())
{
throw new ArgumentException("Failed parsing input <<" + input + ">>");
}
var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect));
foreach (ParseTreeNode intersect in intersects)
{
var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1);
intersect.Span = new SourceSpan(newLocation, 1);
}
var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted));
foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes)
{
PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input);
}
return tree;
}
///
/// Non-terminal nodes in depth-first pre-order, with a conditional stop
///
/// The root node
/// Don't process the children of a node matching this predicate
// inspiration taken from https://irony.codeplex.com/discussions/213938
public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null)
{
var stack = new Stack();
stack.Push(root);
while (stack.Count > 0)
{
var node = stack.Pop();
yield return node;
// Check if we don't want to process the children of this node
if (stopAt != null && stopAt(node)) continue;
var children = node.ChildNodes;
// Push children on in reverse order so that they will
// be evaluated left -> right when popped.
for (int i = children.Count - 1; i >= 0; i--)
{
stack.Push(children[i]);
}
}
}
///
/// All non-terminal nodes in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root)
{
return AllNodesConditional(root);
}
///
/// All non-terminal nodes of a certain type in depth-first pre-order
///
public static IEnumerable AllNodes(this ParseTreeNode root, string type)
{
return AllNodes(root.AllNodes(), type);
}
internal static IEnumerable AllNodes(IEnumerable allNodes, string type)
{
return allNodes.Where(node => node.Is(type));
}
///