Repository: spreadsheetlab/XLParser Branch: master Commit: 8b7d1c684b37 Files: 177 Total size: 25.9 MB Directory structure: gitextract_hbenmd2r/ ├── .gitattributes ├── .gitignore ├── CHANGELOG.md ├── LICENSE.md ├── README.md ├── app/ │ ├── XLParser apps.sln │ ├── XLParser.Web/ │ │ ├── .gitignore │ │ ├── Default.aspx │ │ ├── Default.aspx.cs │ │ ├── Default.aspx.designer.cs │ │ ├── Parse.cs │ │ ├── Properties/ │ │ │ ├── AssemblyInfo.cs │ │ │ ├── Resources.Designer.cs │ │ │ └── Resources.resx │ │ ├── Resources/ │ │ │ ├── ExcelBuiltinFunctionList-v120.txt │ │ │ ├── ExcelBuiltinFunctionList-v139.txt │ │ │ ├── ExcelBuiltinFunctionList-v141.txt │ │ │ ├── ExcelBuiltinFunctionList-v142.txt │ │ │ ├── ExcelBuiltinFunctionList-v150.txt │ │ │ ├── ExcelBuiltinFunctionList-v151.txt │ │ │ ├── ExcelBuiltinFunctionList-v152.txt │ │ │ ├── ExcelBuiltinFunctionList-v160.txt │ │ │ ├── ExcelBuiltinFunctionList-v161.txt │ │ │ ├── ExcelBuiltinFunctionList-v162.txt │ │ │ ├── ExcelBuiltinFunctionList-v163.txt │ │ │ ├── ExcelBuiltinFunctionList-v170.txt │ │ │ ├── ExcelBuiltinFunctionList-v171.txt │ │ │ ├── ExcelBuiltinFunctionList-v172.txt │ │ │ ├── ExcelBuiltinFunctionList-v173.txt │ │ │ ├── ExcelBuiltinFunctionList-v174.txt │ │ │ └── ExcelBuiltinFunctionList-v175.txt │ │ ├── Web.Debug.config │ │ ├── Web.Release.config │ │ ├── Web.config │ │ ├── XLParser Web.csproj │ │ ├── XLParserVersions/ │ │ │ ├── v100/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ └── ExcelFormulaParser.cs │ │ │ ├── v114/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ └── ExcelFormulaParser.cs │ │ │ ├── v120/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ └── PrefixInfo.cs │ │ │ ├── v139/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ └── PrefixInfo.cs │ │ │ ├── v141/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ └── PrefixInfo.cs │ │ │ ├── v142/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ └── PrefixInfo.cs │ │ │ ├── v150/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ └── PrefixInfo.cs │ │ │ ├── v151/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ └── PrefixInfo.cs │ │ │ ├── v152/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ └── PrefixInfo.cs │ │ │ ├── v160/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ └── PrefixInfo.cs │ │ │ ├── v161/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ └── PrefixInfo.cs │ │ │ ├── v162/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ ├── PrefixInfo.cs │ │ │ │ └── WordsTerminal.cs │ │ │ ├── v163/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ ├── PrefixInfo.cs │ │ │ │ └── WordsTerminal.cs │ │ │ ├── v170/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ ├── PrefixInfo.cs │ │ │ │ └── WordsTerminal.cs │ │ │ ├── v171/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ ├── PrefixInfo.cs │ │ │ │ └── WordsTerminal.cs │ │ │ ├── v172/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ ├── PrefixInfo.cs │ │ │ │ └── WordsTerminal.cs │ │ │ ├── v173/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ ├── PrefixInfo.cs │ │ │ │ └── WordsTerminal.cs │ │ │ ├── v174/ │ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ │ ├── ExcelFormulaParser.cs │ │ │ │ ├── ParserReference.cs │ │ │ │ ├── PrefixInfo.cs │ │ │ │ └── WordsTerminal.cs │ │ │ └── v175/ │ │ │ ├── ExcelFormulaGrammar.cs │ │ │ ├── ExcelFormulaParser.cs │ │ │ ├── ParserReference.cs │ │ │ ├── PrefixInfo.cs │ │ │ └── WordsTerminal.cs │ │ ├── d3vizsvg.js │ │ ├── packages.config │ │ └── xlparser-web.css │ └── lib/ │ └── .gitignore ├── doc/ │ ├── README.md │ └── api/ │ ├── Doxyfile │ └── README.MD ├── lib/ │ └── IronyExplorer/ │ ├── .gitignore │ ├── IronyExplorer.sln │ ├── LICENSE │ ├── README.md │ ├── libs/ │ │ └── FastColoredTextBox/ │ │ ├── FastColoredTextBox.XML │ │ └── about.txt │ └── src/ │ └── IronyExplorer.GrammarExplorer/ │ ├── App.config │ ├── GrammarItemList.cs │ ├── GrammarLoader.cs │ ├── Highlighter/ │ │ ├── AboutCodeHighlighter.txt │ │ ├── EditorAdapter.cs │ │ ├── EditorViewAdapter.cs │ │ ├── FastColoredTextBoxHighlighter.cs │ │ ├── RichTextBoxHighlighter.cs │ │ └── WavyLineStyle.cs │ ├── IronyExplorer.GrammarExplorer.csproj │ ├── Program.cs │ ├── Properties/ │ │ ├── AssemblyInfo.cs │ │ ├── Resources.Designer.cs │ │ ├── Resources.resx │ │ ├── Settings.Designer.cs │ │ └── Settings.settings │ ├── fmGrammarExplorer.Designer.cs │ ├── fmGrammarExplorer.cs │ ├── fmGrammarExplorer.resx │ ├── fmSelectGrammars.Designer.cs │ ├── fmSelectGrammars.cs │ ├── fmSelectGrammars.resx │ ├── fmShowException.Designer.cs │ ├── fmShowException.cs │ ├── fmShowException.resx │ └── packages.config └── src/ ├── XLParser/ │ ├── ExcelFormulaGrammar.cs │ ├── ExcelFormulaParser.cs │ ├── FormulaAnalyzer.cs │ ├── ParserReference.cs │ ├── PrefixInfo.cs │ ├── Resources/ │ │ └── ExcelBuiltinFunctionList.txt │ ├── WordsTerminal.cs │ ├── XLParser.csproj │ └── XLParser.nuspec ├── XLParser.Tests/ │ ├── DatasetTests.cs │ ├── FormulaAnalysisTest.cs │ ├── ParserTests.cs │ ├── PrefixInfoTests.cs │ ├── PrintTests.cs │ ├── WordsTerminalTests.cs │ ├── XLParser.Tests.csproj │ └── data/ │ ├── enron/ │ │ ├── formulas.txt │ │ └── knownfails.txt │ ├── euses/ │ │ ├── formulas.txt │ │ └── knownfails.txt │ └── testformulas/ │ ├── structured_references.txt │ └── user_contributed.txt ├── XLParser.sln └── signing.snk ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ ############################################################################### # Set default behavior to automatically normalize line endings. ############################################################################### * text=auto *.cs eol=crlf *.fs eol=crlf *.rc eol=crlf *.vcproj eol=crlf *.sln eol=crlf *.nuspec eol=crlf *.config eol=crlf *.md eol=lf *.html eol=lf *.css eol=lf ############################################################################### # Set default behavior for command prompt diff. # # This is need for earlier builds of msysgit that does not have it on by # default for csharp files. # Note: This is only used by command line ############################################################################### #*.cs diff=csharp ############################################################################### # Set the merge driver for project and solution files # # Merging from the command prompt will add diff markers to the files if there # are conflicts (Merging from VS is not affected by the settings below, in VS # the diff markers are never inserted). Diff markers may cause the following # file extensions to fail to load in VS. An alternative would be to treat # these files as binary and thus will always conflict and require user # intervention with every merge. To do so, just uncomment the entries below ############################################################################### #*.sln merge=binary #*.csproj merge=binary #*.vbproj merge=binary #*.vcxproj merge=binary #*.vcproj merge=binary #*.dbproj merge=binary #*.fsproj merge=binary #*.lsproj merge=binary #*.wixproj merge=binary #*.modelproj merge=binary #*.sqlproj merge=binary #*.wwaproj merge=binary ############################################################################### # behavior for image files # # image files are treated as binary by default. ############################################################################### #*.jpg binary #*.png binary #*.gif binary ############################################################################### # diff behavior for common document formats # # Convert binary document formats to text before diffing them. This feature # is only available from the command line. Turn it on by uncommenting the # entries below. ############################################################################### #*.doc diff=astextplain #*.DOC diff=astextplain #*.docx diff=astextplain #*.DOCX diff=astextplain #*.dot diff=astextplain #*.DOT diff=astextplain #*.pdf diff=astextplain #*.PDF diff=astextplain #*.rtf diff=astextplain #*.RTF diff=astextplain ================================================ FILE: .gitignore ================================================ ## Ignore Visual Studio temporary files, build results, and ## files generated by popular Visual Studio add-ons. # User-specific files *.suo *.user *.userosscache *.sln.docstates # User-specific files (MonoDevelop/Xamarin Studio) *.userprefs # Build results [Dd]ebug/ [Dd]ebugPublic/ [Rr]elease/ [Rr]eleases/ x64/ x86/ build/ bld/ [Bb]in/ [Oo]bj/ # Visual Studo 2015 cache/options directory .vs/ # MSTest test Results [Tt]est[Rr]esult*/ [Bb]uild[Ll]og.* # NUNIT *.VisualState.xml TestResult.xml # Build Results of an ATL Project [Dd]ebugPS/ [Rr]eleasePS/ dlldata.c *_i.c *_p.c *_i.h *.ilk *.meta *.obj *.pch *.pdb *.pgc *.pgd *.rsp *.sbr *.tlb *.tli *.tlh *.tmp *.tmp_proj *.log *.vspscc *.vssscc .builds *.pidb *.svclog *.scc # Chutzpah Test files _Chutzpah* # Visual C++ cache files ipch/ *.aps *.ncb *.opensdf *.sdf *.cachefile # Visual Studio profiler *.psess *.vsp *.vspx # TFS 2012 Local Workspace $tf/ # Guidance Automation Toolkit *.gpState # ReSharper is a .NET coding add-in _ReSharper*/ *.[Rr]e[Ss]harper *.DotSettings.user # JustCode is a .NET coding addin-in .JustCode # TeamCity is a build add-in _TeamCity* # DotCover is a Code Coverage Tool *.dotCover # NCrunch _NCrunch_* .*crunch*.local.xml # MightyMoose *.mm.* AutoTest.Net/ # Web workbench (sass) .sass-cache/ # Installshield output folder [Ee]xpress/ # DocProject is a documentation generator add-in DocProject/buildhelp/ DocProject/Help/*.HxT DocProject/Help/*.HxC DocProject/Help/*.hhc DocProject/Help/*.hhk DocProject/Help/*.hhp DocProject/Help/Html2 DocProject/Help/html # Click-Once directory publish/ # Publish Web Output *.[Pp]ublish.xml *.azurePubxml # TODO: Comment the next line if you want to checkin your web deploy settings # but database connection strings (with potential passwords) will be unencrypted *.pubxml *.publishproj # NuGet Packages *.nupkg # The packages folder can be ignored because of Package Restore **/packages/* # except build/, which is used as an MSBuild target. !**/packages/build/ # Uncomment if necessary however generally it will be regenerated when needed #!**/packages/repositories.config # Windows Azure Build Output csx/ *.build.csdef # Windows Store app package directory AppPackages/ # Others *.[Cc]ache ClientBin/ [Ss]tyle[Cc]op.* ~$* *~ *.dbmdl *.dbproj.schemaview *.pfx *.publishsettings node_modules/ bower_components/ # RIA/Silverlight projects Generated_Code/ # Backup & report files from converting an old project file # to a newer Visual Studio version. Backup files are not needed, # because we have git ;-) _UpgradeReport_Files/ Backup*/ UpgradeLog*.XML UpgradeLog*.htm # SQL Server files *.mdf *.ldf # Business Intelligence projects *.rdl.data *.bim.layout *.bim_*.settings # Microsoft Fakes FakesAssemblies/ # Node.js Tools for Visual Studio .ntvs_analysis.dat # Visual Studio 6 build log *.plg # Visual Studio 6 workspace options file *.opt # Private file /nuget-sign.bat ================================================ FILE: CHANGELOG.md ================================================ # Changelog ## 1.3.0 * Build for .NET 4.5.2, 4.6.1 and standard 1.6, thanks [igitur](https://github.com/spreadsheetlab/XLParser/pull/61). * Remove embedded Irony dependency in favor of [daxnet](https://github.com/daxnet)s [updated fork](https://github.com/daxnet/irony). ## 1.2.4 Reference implementation of the Excel grammar published in the Journal of Systems and Software SCAM special issue paper "A Grammar for Spreadsheet Formulas Evaluated on Two Large Datasets" by E. Aivaloglou, D. Hoepelman and F. Hermans. * Fixed several errors in which names/named ranges were allowed * Question marks are now allowed * Can now start with all unicode letters (e.g. `=äbc`) * Corrected characters which are allowed if the name starts with a cell name or TRUE/FALSE (e.g. `=A1.MYNAME`) * Allow for whitespace-only sheetnames (e.g. `=' '!A1`), altough they will always be returned as `" "` by `PrefixInfo` * Made some corrections in how multiple sheet references (`=Sheet1:Sheet3!A1`) are parsed * Removed escape sequences in strings (e.g. `"Line1\nLine2"`) as these are not part of the Excel formula language * Added support for structured references to a complete table (e.g. `=MyTable[]`) ## 1.2.3 * Adds support for special characters in structured references. ## 1.2.2 * Adds equality to `PrefixInfo` class * Fixes parse error if external reference file path contains a space (`='C:\My Dir\[file.xlsx]Sheet'!A1`) * `ExcelFormulaParser.SkipToRelevant` no longer skips references without a prefix.
This is a breaking change, but the old behavior is arguably a bug. An argument is added to restore old behavior, defaults to new behavior. ## 1.2.1 * Adds `GetReferenceNodes` method to `ExcelFormulaParser` ## 1.2 Fixes [#16](https://github.com/PerfectXL/XLParser/issues/16), [#17](https://github.com/PerfectXL/XLParser/issues/17), [#19](https://github.com/PerfectXL/XLParser/issues/3) * Made it easier to modify the grammar in your own class by extending the grammar class * Can now parse non-numeric filenames (`=[file]Sheet!A1`) * Parsing of the `Prefix` nonterminal is changed and is now a little bit more uniform. `ExcelFormulaParser.GetPrefixInfo` gives prefix information in an easy to use format. * Can now parse [Structured References](https://support.office.com/en-us/article/Using-structured-references-with-Excel-tables-f5ed2452-2337-4f71-bed3-c8ae6d2b276e). See [#16](https://github.com/PerfectXL/XLParser/issues/16) for caveats. * You can now select the XLParser version to use in the web demo ## 1.1.4 * Added some missing methods that test for specific types of operators * Added tests and fixes if necessary for methods that were missing tests ## 1.1.3 Reference implementation of the Excel grammar published in the paper "A Grammar for Spreadsheet Formulas Evaluated on Two Large Datasets" by E. Aivaloglou, D. Hoepelman and F. Hermans. * Added all formulas from EUSES and Enron datasets and tests to check if they all parse * Made parser thread safe * Fixed [#9](https://github.com/PerfectXL/XLParser/issues/9): some tokens which would not accept all unicode characters (like UDF) now do so * `'Sheet1:Sheet5'` will now correctly parse as `MULTIPLESHEETS` instead of a single sheet ## 1.1.2 Fixed [#1](https://github.com/PerfectXL/XLParser/issues/1), [#2](https://github.com/PerfectXL/XLParser/issues/2), [#4](https://github.com/PerfectXL/XLParser/issues/4). * Added a web demo in app/XLParser.Web which generates parse tree images * All UDF's now use the same nonterminal * Non-Prefixed UDFs can now be part of a reference expression * IF and CHOOSE functions can now be part of a reference expression * Reference functions INDEX,OFFSET and INDIRECT can no longer have a prefix * Operator precedence for reference operators (: , and intersection) is now correct * Fixed printing of reference operators ## 1.0.0 First public release. Corresponds to pre-print/reviewer version of the paper ================================================ FILE: LICENSE.md ================================================ Mozilla Public License, version 2.0 1. Definitions 1.1. "Contributor" means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software. 1.2. "Contributor Version" means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor's Contribution. 1.3. "Contribution" means Covered Software of a particular Contributor. 1.4. "Covered Software" means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof. 1.5. "Incompatible With Secondary Licenses" means a. that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or b. that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License. 1.6. "Executable Form" means any form of the work other than Source Code Form. 1.7. "Larger Work" means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" means this document. 1.9. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License. 1.10. "Modifications" means any of the following: a. any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or b. any new file in Source Code Form that contains any Covered Software. 1.11. "Patent Claims" of a Contributor means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version. 1.12. "Secondary License" means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses. 1.13. "Source Code Form" means the form of the work preferred for making modifications. 1.14. "You" (or "Your") means an individual or a legal entity exercising rights under this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. 2. License Grants and Conditions 2.1. Grants Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: a. under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and b. under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version. 2.2. Effective Date The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution. 2.3. Limitations on Grant Scope The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor: a. for any code that a Contributor has removed from Covered Software; or b. for infringements caused by: (i) Your and any other third party's modifications of Covered Software, or (ii) the combination of its Contributions with other software (except as part of its Contributor Version); or c. under Patent Claims infringed by Covered Software in the absence of its Contributions. This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4). 2.4. Subsequent Licenses No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3). 2.5. Representation Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License. 2.6. Fair Use This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents. 2.7. Conditions Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1. 3. Responsibilities 3.1. Distribution of Source Form All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients' rights in the Source Code Form. 3.2. Distribution of Executable Form If You distribute Covered Software in Executable Form then: a. such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and b. You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients' rights in the Source Code Form under this License. 3.3. Distribution of a Larger Work You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s). 3.4. Notices You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies. 3.5. Application of Additional Terms You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction. 4. Inability to Comply Due to Statute or Regulation If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. 5. Termination 5.1. The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated (a) provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice. 5.2. If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate. 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination. 6. Disclaimer of Warranty Covered Software is provided under this License on an "as is" basis, without warranty of any kind, either expressed, implied, or statutory, including, without limitation, warranties that the Covered Software is free of defects, merchantable, fit for a particular purpose or non-infringing. The entire risk as to the quality and performance of the Covered Software is with You. Should any Covered Software prove defective in any respect, You (not any Contributor) assume the cost of any necessary servicing, repair, or correction. This disclaimer of warranty constitutes an essential part of this License. No use of any Covered Software is authorized under this License except under this disclaimer. 7. Limitation of Liability Under no circumstances and under no legal theory, whether tort (including negligence), contract, or otherwise, shall any Contributor, or anyone who distributes Covered Software as permitted above, be liable to You for any direct, indirect, special, incidental, or consequential damages of any character including, without limitation, damages for lost profits, loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses, even if such party shall have been informed of the possibility of such damages. This limitation of liability shall not apply to liability for death or personal injury resulting from such party's negligence to the extent applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion or limitation of incidental or consequential damages, so this exclusion and limitation may not apply to You. 8. Litigation Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party's ability to bring cross-claims or counter-claims. 9. Miscellaneous This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor. 10. Versions of the License 10.1. New Versions Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number. 10.2. Effect of New Versions You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward. 10.3. Modified Versions If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License). 10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached. Exhibit A - Source Code Form License Notice This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice. You may add additional accurate notices of copyright ownership. ================================================ FILE: README.md ================================================ # XLParser A C# Excel formula parser with the following properties: * **High compatiblity**
XLParser has been tested on over a million real-world formulas and has a 99.9% succesful parse rate. * **Compact parse trees**
XLParser was designed for and is used in research of Excel spreadsheets and refactoring, all of which are easier if parse trees are smaller * **Compact grammar**
[Our grammar](https://github.com/PerfectXL/XLParser/blob/master/src/XLParser/ExcelFormulaGrammar.cs) contains less than 100 tokens and production rules, and is thus easy to implement in another language or parser generator. ### Quickstart You can parse formulas and view parse trees in [the online demo](https://xlparser.perfectxl.nl/demo). **or** Use [the NuGet package](https://www.nuget.org/packages/XLParser/). **or** 1. Download the [latest release](https://github.com/PerfectXL/XLParser/releases/latest) 2. Extract somewhere convenient 3. Build Irony GrammarExplorer, see the Debugging section below, and open it 4. Click on the `...` button at the top and select `Add Grammar` 5. Point to the `XLParser.dll` file in the folder you extracted and click ok 6. You can now parse formulas and see the trees in the `Test` tab ## Background XLParser is the reference implementation of the Excel grammar published in the paper ["A Grammar for Spreadsheet Formulas Evaluated on Two Large Datasets" by Efthimia Aivaloglou, David Hoepelman and Felienne Hermans](https://fenia266781730.files.wordpress.com/2019/01/07335408.pdf). XLParser can parse Excel formulas and is intended to facilitate the analysis of spreadsheet formulas, and for that purpose produces compact parse trees. XLParser has a 99.99% success rate on the [Enron](http://www.felienne.com/archives/3634) and [EUSES](http://eusesconsortium.org/resources.php) datasets. Note however that XLParser is not very restrictive, and thus might parse formulas that Excel would reject as invalid, keep this in mind when parsing user input with XLParser. XLParser is based on the C# [Irony parser framework](https://github.com/IronyProject/Irony). ## Using XLParser ### Building Open the `XLParser.sln` file in `src/` in Visual Studio and press build. The dependencies are already included in compiled form in this repository. ### Using in your project The easiest way to add the dependency to your project is with [NuGet](https://www.nuget.org/packages/XLParser/) The `ExcelFormulaParser` class is your main entry point. You can parse a formula through `ExcelFormulaParser.Parse("yourformula")`. `ExcelFormulaParser` has several useful methods that operate directly on the parse tree like `AllNodes` to traverse the whole tree or `GetFunction` to get the function name of a node that represents a function call. You can `Print` any node. `FormulaAnalyzer` provides functionality for analyzing the parse tree. For example, `FormulaAnalyzer.ParserReferences()` returns all references (to cells, cell ranges, named ranges, horizontal or vertical ranges) that the formula contains. ### Debugging Irony, the parser framework XLParser uses, includes a tool called the "grammar explorer". This is a great way to play around with the grammar and parse trees. To use this tool, you first need to build it once by opening the IronyExplorer solution (`lib/IronyExplorer/IronyExplorer.sln`) and building it with release configuration. After that you can use the binary in `lib/IronyExplorer/src/IronyExplorer.GrammarExplorer/bin/Release/IronyExplorer.GrammarExplorer.exe`. To load the XLParser grammar, first make sure you have built XLParser. Then open the GrammarExplorer and add the grammar (`...` button) from `src/XLParser/bin/Debug/XLParser.dll`. In Visual Studio you can see the printed version of any node during debugging by adding `yournode.Print(),ac` in the watch window. ## Documentation Browse the [API documentation](http://spreadsheetlab.github.io/XLParser/api/index.html) See [the doc folder](doc) for further documentation. ## License All files of XLParser are released under the [Mozilla Public License 2.0](License.md). ================================================ FILE: app/XLParser apps.sln ================================================  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 14 VisualStudioVersion = 14.0.23107.0 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "XLParser Web", "XLParser.Web\XLParser Web.csproj", "{A24BD58E-5958-4990-8836-958C52A6AA02}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {A24BD58E-5958-4990-8836-958C52A6AA02}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {A24BD58E-5958-4990-8836-958C52A6AA02}.Debug|Any CPU.Build.0 = Debug|Any CPU {A24BD58E-5958-4990-8836-958C52A6AA02}.Release|Any CPU.ActiveCfg = Release|Any CPU {A24BD58E-5958-4990-8836-958C52A6AA02}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection EndGlobal ================================================ FILE: app/XLParser.Web/.gitignore ================================================ deploy ================================================ FILE: app/XLParser.Web/Default.aspx ================================================ <%@ Page Language="C#" AutoEventWireup="true" CodeBehind="Default.aspx.cs" Inherits="XLParser.Web.Default" %> XLParser web demo

XLParser web demo

Formula: Version:
Found a bug?

TU Delft logo
Spreadsheet lab logo
Infotron logo

Parse Tree (SVG, PNG):

================================================ FILE: app/XLParser.Web/Default.aspx.cs ================================================ using System; using System.Web.UI; namespace XLParser.Web { public partial class Default : Page { protected void Page_Load(object sender, EventArgs e) { } } } ================================================ FILE: app/XLParser.Web/Default.aspx.designer.cs ================================================ //------------------------------------------------------------------------------ // // This code was generated by a tool. // // Changes to this file may cause incorrect behavior and will be lost if // the code is regenerated. // //------------------------------------------------------------------------------ namespace XLParser.Web { public partial class Default { } } ================================================ FILE: app/XLParser.Web/Parse.cs ================================================ using System; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.IO; using System.Linq; using System.Net; using System.Text.RegularExpressions; using System.Web; using Irony.Parsing; using Newtonsoft.Json; using XLParser.Web.XLParserVersions.v100; namespace XLParser.Web { public class Parse : IHttpHandler { private HttpContext _httpContext; private void WriteResponse(string s) { _httpContext.Response.Write(s); } private static readonly bool DisableCache = #if(DEBUG) true; #else false; #endif private const string LatestVersion = "175"; public void ProcessRequest(HttpContext context) { _httpContext = context; if (!DisableCache && context.Request.Params["nocache"] != "true") { context.Response.Cache.SetCacheability(HttpCacheability.Public); context.Response.Cache.SetExpires(DateTime.Now.AddMinutes(5)); context.Response.Cache.SetMaxAge(new TimeSpan(0, 0, 5)); } context.Response.AddHeader("Access-Control-Allow-Origin", "*"); // Dynamically load a library version var xlParserVersion = context.Request.Params["version"] ?? LatestVersion; if (!Regex.IsMatch(xlParserVersion, @"^[0-9]{3,4}[\-a-z0-9]*$")) { context.Response.StatusCode = (int) HttpStatusCode.BadRequest; _httpContext.Response.ContentType = "text/plain"; WriteResponse("Invalid version"); context.Response.End(); return; } try { LoadXlParserVersion(xlParserVersion); } catch (ArgumentException) { context.Response.StatusCode = (int) HttpStatusCode.NotFound; _httpContext.Response.ContentType = "text/plain"; WriteResponse("Version doesn't exist"); context.Response.End(); return; } // We want to actually give meaningful HTTP error codes and not have IIS interfere context.Response.TrySkipIisCustomErrors = true; // check file extension for format var format = (Path.GetExtension(context.Request.FilePath) ?? ".json").TrimStart('.'); var formula = context.Request.Unvalidated["formula"]; switch (format) { case "json": ParseToJson(formula); break; default: context.Response.StatusCode = 415; _httpContext.Response.ContentType = "text/plain"; WriteResponse($"Format '{format}' not supported."); context.Response.End(); break; } } private void ParseToJson(string formula) { _httpContext.Response.ContentType = "application/json"; if (formula == null) { _httpContext.Response.StatusCode = 400; WriteResponse(JsonConvert.SerializeObject(new {error = "no formula supplied"})); _httpContext.Response.End(); return; } ParseTreeNode root; try { //root = XLParser.ExcelFormulaParser.Parse(formula); root = _parse(formula); } catch (ArgumentException) { // Parse error, return 422 - Unprocessable Entity _httpContext.Response.StatusCode = 422; ParseTree r = new Parser((Grammar) Activator.CreateInstance(_grammar)).Parse(formula); WriteResponse(JsonConvert.SerializeObject(new { error = "Parse error", formula, message = r.ParserMessages.Select(m => new { level = m.Level.ToString(), line = m.Location.Line + 1, column = m.Location.Column + 1, msg = m.Message }).FirstOrDefault() })); _httpContext.Response.End(); return; } WriteResponse(JsonConvert.SerializeObject(ToJson(root), Formatting.Indented, new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore })); _httpContext.Response.End(); } private JsonNode ToJson(ParseTreeNode node) { return new JsonNode { name = NodeText(node), children = node.ChildNodes.Count == 0 ? null : node.ChildNodes.Select(ToJson) }; } [SuppressMessage("ReSharper", "InconsistentNaming")] private class JsonNode { public IEnumerable children; public string name; } private string NodeText(ParseTreeNode node) { if (node.Term is NonTerminal) { return node.Term.Name; } // These are simple terminals like + or =, just print them // For other terminals, print the terminal name + contents return node.Term.Name.Length <= 2 ? _print(node) : $"{node.Term.Name}[\"{_print(node)}\"]"; } private Func _parse; private Func _print; private Type _grammar; // Yes, this is f-ugly. Better solutions were tried (dynamically loading through reflection, extern alias and separate AppDomains) but failed. // Mainly this is because .NET is very very picky about loading multiple versions of libraries with the same name private void LoadXlParserVersion(string version) { switch (version) { case "100": _parse = ExcelFormulaParser.Parse; _print = ExcelFormulaParser.Print; _grammar = typeof(ExcelFormulaGrammar); break; case "114": _parse = XLParserVersions.v114.ExcelFormulaParser.Parse; _print = XLParserVersions.v114.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v114.ExcelFormulaGrammar); break; case "120": _parse = XLParserVersions.v120.ExcelFormulaParser.Parse; _print = XLParserVersions.v120.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v120.ExcelFormulaGrammar); break; case "139": _parse = XLParserVersions.v139.ExcelFormulaParser.Parse; _print = XLParserVersions.v139.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v139.ExcelFormulaGrammar); break; case "141": _parse = XLParserVersions.v141.ExcelFormulaParser.Parse; _print = XLParserVersions.v141.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v141.ExcelFormulaGrammar); break; case "142": _parse = XLParserVersions.v142.ExcelFormulaParser.Parse; _print = XLParserVersions.v142.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v142.ExcelFormulaGrammar); break; case "150": _parse = XLParserVersions.v150.ExcelFormulaParser.Parse; _print = XLParserVersions.v150.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v150.ExcelFormulaGrammar); break; case "151": _parse = XLParserVersions.v151.ExcelFormulaParser.Parse; _print = XLParserVersions.v151.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v151.ExcelFormulaGrammar); break; case "152": _parse = XLParserVersions.v152.ExcelFormulaParser.Parse; _print = XLParserVersions.v152.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v152.ExcelFormulaGrammar); break; case "160": _parse = XLParserVersions.v160.ExcelFormulaParser.Parse; _print = XLParserVersions.v160.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v160.ExcelFormulaGrammar); break; case "161": _parse = XLParserVersions.v161.ExcelFormulaParser.Parse; _print = XLParserVersions.v161.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v161.ExcelFormulaGrammar); break; case "162": _parse = XLParserVersions.v162.ExcelFormulaParser.Parse; _print = XLParserVersions.v162.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v162.ExcelFormulaGrammar); break; case "163": _parse = XLParserVersions.v163.ExcelFormulaParser.Parse; _print = XLParserVersions.v163.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v163.ExcelFormulaGrammar); break; case "170": _parse = XLParserVersions.v170.ExcelFormulaParser.Parse; _print = XLParserVersions.v170.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v170.ExcelFormulaGrammar); break; case "171": _parse = XLParserVersions.v171.ExcelFormulaParser.Parse; _print = XLParserVersions.v171.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v171.ExcelFormulaGrammar); break; case "172": _parse = XLParserVersions.v172.ExcelFormulaParser.Parse; _print = XLParserVersions.v172.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v172.ExcelFormulaGrammar); break; case "173": _parse = XLParserVersions.v173.ExcelFormulaParser.Parse; _print = XLParserVersions.v173.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v173.ExcelFormulaGrammar); break; case "174": _parse = XLParserVersions.v174.ExcelFormulaParser.Parse; _print = XLParserVersions.v174.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v174.ExcelFormulaGrammar); break; case "175": _parse = XLParserVersions.v175.ExcelFormulaParser.Parse; _print = XLParserVersions.v175.ExcelFormulaParser.Print; _grammar = typeof(XLParserVersions.v175.ExcelFormulaGrammar); break; default: throw new ArgumentException($"Version {version} doesn't exist"); } } public bool IsReusable => true; } } ================================================ FILE: app/XLParser.Web/Properties/AssemblyInfo.cs ================================================ using System.Reflection; using System.Runtime.InteropServices; [assembly: ComVisible(false)] [assembly: Guid("ffb6e35e-f708-485b-996e-f1f0a54fffb7")] [assembly: AssemblyVersion("1.7.5.0")] ================================================ FILE: app/XLParser.Web/Properties/Resources.Designer.cs ================================================ //------------------------------------------------------------------------------ // // This code was generated by a tool. // Runtime Version:4.0.30319.42000 // // Changes to this file may cause incorrect behavior and will be lost if // the code is regenerated. // //------------------------------------------------------------------------------ namespace XLParser.Web.Properties { using System; /// /// A strongly-typed resource class, for looking up localized strings, etc. /// // This class was auto-generated by the StronglyTypedResourceBuilder // class via a tool like ResGen or Visual Studio. // To add or remove a member, edit your .ResX file then rerun ResGen // with the /str option, or rebuild your VS project. [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "17.0.0.0")] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] internal class Resources { private static global::System.Resources.ResourceManager resourceMan; private static global::System.Globalization.CultureInfo resourceCulture; [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] internal Resources() { } /// /// Returns the cached ResourceManager instance used by this class. /// [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] internal static global::System.Resources.ResourceManager ResourceManager { get { if (object.ReferenceEquals(resourceMan, null)) { global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("XLParser.Web.Properties.Resources", typeof(Resources).Assembly); resourceMan = temp; } return resourceMan; } } /// /// Overrides the current thread's CurrentUICulture property for all /// resource lookups using this strongly typed resource class. /// [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] internal static global::System.Globalization.CultureInfo Culture { get { return resourceCulture; } set { resourceCulture = value; } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ADDRESS ///AMORDEGRC ///AMORLINC ///AND ///AREAS ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOMDIST ///CALL ///CEILING ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHITEST ///CLEAN ///CODE ///COLUMN ///COLUMNS ///COMBIN ///COMPLEX ///CONCATENATE ///CONFIDENCE ///CONVERT ///CORREL ///COS ///COSH ///COUNT ///COUNTA ///COUNTBLANK ///COUNTIF ///COUNTIFS ///COUPDAYBS ///COUPDAYS ///COUPDAYSNC ///COUPNCD ///C [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v120 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList-v120", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIST ///CHISQ.DIST [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v139 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList-v139", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIST ///CHISQ.DIST [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v141 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v141", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIST ///CHISQ.DIST [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v142 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v142", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v150 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v150", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v151 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v151", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v152 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v152", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v160 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v160", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v161 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v161", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v162 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v162", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v163 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v163", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v170 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v170", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v171 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v171", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v172 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v172", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v173 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v173", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v174 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v174", resourceCulture); } } /// /// Looks up a localized string similar to ABS ///ACCRINT ///ACCRINTM ///ACOS ///ACOSH ///ACOT ///ACOTH ///ADDRESS ///AGGREGATE ///AMORDEGRC ///AMORLINC ///AND ///ARABIC ///AREAS ///ARRAYTOTEXT ///ASC ///ASIN ///ASINH ///ATAN ///ATAN2 ///ATANH ///AVEDEV ///AVERAGE ///AVERAGEA ///AVERAGEIF ///AVERAGEIFS ///BAHTTEXT ///BASE ///BESSELI ///BESSELJ ///BESSELK ///BESSELY ///BETA.DIST ///BETA.INV ///BETADIST ///BETAINV ///BIN2DEC ///BIN2HEX ///BIN2OCT ///BINOM.DIST ///BINOM.DIST.RANGE ///BINOM.INV ///BINOMDIST ///BITAND ///BITLSHIFT ///BITOR ///BITRSHIFT ///BITXOR ///CALL ///CEILING ///CEILING.MATH ///CEILING.PRECISE ///CELL ///CHAR ///CHIDIST ///CHIINV ///CHISQ.DIS [rest of string was truncated]";. /// internal static string ExcelBuiltinFunctionList_v175 { get { return ResourceManager.GetString("ExcelBuiltinFunctionList_v175", resourceCulture); } } } } ================================================ FILE: app/XLParser.Web/Properties/Resources.resx ================================================  text/microsoft-resx 2.0 System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 ..\Resources\ExcelBuiltinFunctionList-v120.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\resources\excelbuiltinfunctionlist-v139.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v141.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v142.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v150.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v151.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v152.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\resources\excelbuiltinfunctionlist-v160.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\resources\excelbuiltinfunctionlist-v161.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\resources\excelbuiltinfunctionlist-v162.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;iso-8859-1 ..\resources\excelbuiltinfunctionlist-v163.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v170.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v171.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v172.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v173.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v174.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ..\Resources\ExcelBuiltinFunctionList-v175.txt;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;Windows-1252 ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v120.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ADDRESS AMORDEGRC AMORLINC AND AREAS ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BESSELI BESSELJ BESSELK BESSELY BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOMDIST CALL CEILING CELL CHAR CHIDIST CHIINV CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMPLEX CONCATENATE CONFIDENCE CONVERT CORREL COS COSH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR CRITBINOM CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEVALUE DAVERAGE DAY DAYS360 DB DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATEEFFECT EOMONTH ERF ERFC ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPONDIST FACT FACTDOUBLE FALSE FDIST FIND FINV FISHER FISHERINV FIXED FLOOR FORECAST FREQUENCY FTEST FV FVSCHEDULE GAMMADIST GAMMAINV GAMMALN GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOMDIST ISBLANK IFERROR IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSIN IMSQRT IMSUB IMSUM INFO INT INTERCEPT INTRATE IPMT IRR IS ISB ISERROR ISNA ISNUMBER ISPMT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LINEST LN LOG LOG10 LOGEST LOGINV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINUTE MINVERSE MIRR MMULT MOD MODE MONTH MROUND MULTINOMIAL N NA NEGBINOMDIST NETWORKDAYS NOMINAL NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PEARSON PERCENTILE PERCENTRANK PERMUT PHONETIC PI PMT POISSON POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUOTIENT RADIANS RAND RANDBETWEEN RANK RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RSQ RTD SEARCH SEARCHB SECOND SERIESSUM SIGN SIN SINH SKEW SLN SLOPE SMALL SQL.REQUEST SQRT SQRTPI STANDARDIZE STDEV STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SYD T TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UPPER VALUE VAR VARA VARP VARPA VDB VLOOKUP WEEKDAY WEEKNUM WEIBULL WORKDAY XIRR XNPV YEAR YEARFRAC YIELD YIELDDISC YIELDMAT ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v139.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST ISBLANK IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR IS ISB ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SQL.REQUEST SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UPPER VALUE VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v141.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST ISBLANK IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR IS ISB ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SQL.REQUEST SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UPPER VALUE VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v142.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST ISBLANK IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR IS ISB ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SQL.REQUEST SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UPPER VALUE VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v150.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v151.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v152.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v160.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v161.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v162.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v163.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v170.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v171.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v172.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v173.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v174.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Resources/ExcelBuiltinFunctionList-v175.txt ================================================ ABS ACCRINT ACCRINTM ACOS ACOSH ACOT ACOTH ADDRESS AGGREGATE AMORDEGRC AMORLINC AND ARABIC AREAS ARRAYTOTEXT ASC ASIN ASINH ATAN ATAN2 ATANH AVEDEV AVERAGE AVERAGEA AVERAGEIF AVERAGEIFS BAHTTEXT BASE BESSELI BESSELJ BESSELK BESSELY BETA.DIST BETA.INV BETADIST BETAINV BIN2DEC BIN2HEX BIN2OCT BINOM.DIST BINOM.DIST.RANGE BINOM.INV BINOMDIST BITAND BITLSHIFT BITOR BITRSHIFT BITXOR CALL CEILING CEILING.MATH CEILING.PRECISE CELL CHAR CHIDIST CHIINV CHISQ.DIST CHISQ.DIST.RT CHISQ.INV CHISQ.INV.RT CHISQ.TEST CHITEST CLEAN CODE COLUMN COLUMNS COMBIN COMBINA COMPLEX CONCAT CONCATENATE CONFIDENCE CONFIDENCE.NORM CONFIDENCE.T CONVERT CORREL COS COSH COT COTH COUNT COUNTA COUNTBLANK COUNTIF COUNTIFS COUPDAYBS COUPDAYS COUPDAYSNC COUPNCD COUPNUM COUPPCD COVAR COVARIANCE.P COVARIANCE.S CRITBINOM CSC CSCH CUBEKPIMEMBER CUBEMEMBER CUBEMEMBERPROPERTY CUBERANKEDMEMBER CUBESET CUBESETCOUNT CUBEVALUE CUMIPMT CUMPRINC DATE DATEDIF DATEVALUE DAVERAGE DAY DAYS DAYS360 DB DBCS DCOUNT DCOUNTA DDB DEC2BIN DEC2HEX DEC2OCT DECIMAL DEGREES DELTA DEVSQ DGET DISC DMAX DMIN DOLLAR DOLLARDE DOLLARFR DPRODUCT DSTDEV DSTDEVP DSUM DURATION DVAR DVARP EDATE EFFECT ENCODEURL EOMONTH ERF ERF.PRECISE ERFC ERFC.PRECISE ERROR.TYPE EUROCONVERT EVEN EXACT EXP EXPON.DIST EXPONDIST F.DIST F.DIST.RT F.INV F.INV.RT F.TEST FACT FACTDOUBLE FALSE FDIST FILTER FILTERXML FIND FINDB FINV FISHER FISHERINV FIXED FLOOR FLOOR.MATH FLOOR.PRECISE FORECAST FORECAST.ETS FORECAST.ETS.CONFINT FORECAST.ETS.SEASONALITY FORECAST.ETS.STAT FORECAST.LINEAR FORMULATEXT FREQUENCY FTEST FV FVSCHEDULE GAMMA GAMMA.DIST GAMMA.INV GAMMADIST GAMMAINV GAMMALN GAMMALN.PRECISE GAUSS GCD GEOMEAN GESTEP GETPIVOTDATA GROWTH HARMEAN HEX2BIN HEX2DEC HEX2OCT HLOOKUP HOUR HYPERLINK HYPGEOM.DIST HYPGEOMDIST IFERROR IFNA IFS IMABS IMAGINARY IMARGUMENT IMCONJUGATE IMCOS IMCOSH IMCOT IMCSC IMCSCH IMDIV IMEXP IMLN IMLOG10 IMLOG2 IMPOWER IMPRODUCT IMREAL IMSEC IMSECH IMSIN IMSINH IMSQRT IMSUB IMSUM IMTAN INFO INT INTERCEPT INTRATE IPMT IRR ISBLANK ISERR ISERROR ISEVEN ISFORMULA ISLOGICAL ISNA ISNONTEXT ISNUMBER ISO.CEILING ISODD ISOWEEKNUM ISPMT ISREF ISTEXT JIS KURT LARGE LCM LEFT LEFTB LEN LENB LET LINEST LN LOG LOG10 LOGEST LOGINV LOGNORM.DIST LOGNORM.INV LOGNORMDIST LOOKUP LOWER MATCH MAX MAXA MAXIFS MDETERM MDURATION MEDIAN MID MIDB MIN MINA MINIFS MINUTE MINVERSE MIRR MMULT MOD MODE MODE.MULT MODE.SNGL MONTH MROUND MULTINOMIAL MUNIT N NA NEGBINOM.DIST NEGBINOMDIST NETWORKDAYS NETWORKDAYS.INTL NOMINAL NORM.DIST NORM.INV NORM.S.DIST NORM.S.INV NORMDIST NORMINV NORMSDIST NORMSINV NOT NOW NPER NPV NUMBERVALUE OCT2BIN OCT2DEC OCT2HEX ODD ODDFPRICE ODDFYIELD ODDLPRICE ODDLYIELD OR PDURATION PEARSON PERCENTILE PERCENTILE.EXC PERCENTILE.INC PERCENTRANK PERCENTRANK.EXC PERCENTRANK.INC PERMUT PERMUTATIONA PHI PHONETIC PI PMT POISSON POISSON.DIST POWER PPMT PRICE PRICEDISC PRICEMAT PROB PRODUCT PROPER PV QUARTILE QUARTILE.EXC QUARTILE.INC QUOTIENT RADIANS RAND RANDARRAY RANDBETWEEN RANK RANK.AVG RANK.EQ RATE RECEIVED REGISTER.ID REPLACE REPLACEB REPT RIGHT RIGHTB ROMAN ROUND ROUNDDOWN ROUNDUP ROW ROWS RRI RSQ RTD SEARCH SEARCHB SEC SECH SECOND SEQUENCE SERIESSUM SHEET SHEETS SIGN SIN SINH SKEW SKEW.P SLN SLOPE SMALL SORT SORTBY SQRT SQRTPI STANDARDIZE STDEV STDEV.P STDEV.S STDEVA STDEVP STDEVPA STEYX SUBSTITUTE SUBTOTAL SUM SUMIF SUMIFS SUMPRODUCT SUMSQ SUMX2MY2 SUMX2PY2 SUMXMY2 SWITCH SYD T T.DIST T.DIST.2T T.DIST.RT T.INV T.INV.2T T.TEST TAN TANH TBILLEQ TBILLPRICE TBILLYIELD TDIST TEXT TEXTJOIN TIME TIMEVALUE TINV TODAY TRANSPOSE TREND TRIM TRIMMEAN TRUE TRUNC TTEST TYPE UNICHAR UNICODE UNIQUE UPPER VALUE VALUETOTEXT VAR VAR.P VAR.S VARA VARP VARPA VDB VLOOKUP WEBSERVICE WEEKDAY WEEKNUM WEIBULL WEIBULL.DIST WORKDAY WORKDAY.INTL XIRR XLOOKUP XMATCH XNPV XOR YEAR YEARFRAC YIELD YIELDDISC YIELDMAT Z.TEST ZTEST ================================================ FILE: app/XLParser.Web/Web.Debug.config ================================================ ================================================ FILE: app/XLParser.Web/Web.Release.config ================================================ ================================================ FILE: app/XLParser.Web/Web.config ================================================  ================================================ FILE: app/XLParser.Web/XLParser Web.csproj ================================================  Debug AnyCPU 2.0 {A24BD58E-5958-4990-8836-958C52A6AA02} {349c5851-65df-11da-9384-00065b846f21};{fae04ec0-301f-11d3-bf4b-00c04f79efbc} Library Properties XLParser.Web XLParser Web v4.7.2 true true full false bin\ DEBUG;TRACE prompt 4 pdbonly true bin\ TRACE prompt 4 ..\packages\Irony.1.2.0\lib\netstandard2.0\Irony.dll ..\packages\Newtonsoft.Json.12.0.3\lib\net45\Newtonsoft.Json.dll Web.config Web.config Default.aspx ASPXCodeBehind Default.aspx True True Resources.resx ResXFileCodeGenerator Resources.Designer.cs 10.0 $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) True True 39901 / http://localhost:39901/ False False False ================================================ FILE: app/XLParser.Web/XLParserVersions/v100/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; namespace XLParser.Web.XLParserVersions.v100 { [Language("Excel Formulas", "1.0.0", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { public ExcelFormulaGrammar() : base(false) { #region 1-Terminals #region Symbols and operators var comma = ToTerm(","); var colon = ToTerm(":"); var semicolon = ToTerm(";"); var OpenParen = ToTerm("("); var CloseParen = ToTerm(")"); var CloseSquareParen = ToTerm("]"); var OpenSquareParen = ToTerm("["); var exclamationMark = ToTerm("!"); var CloseCurlyParen = ToTerm("}"); var OpenCurlyParen = ToTerm("{"); var mulop = ToTerm("*"); var plusop = ToTerm("+"); var divop = ToTerm("/"); var minop = ToTerm("-"); var concatop = ToTerm("&"); var expop = ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and it's default priority is low var intersectop = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); var percentop = ToTerm("%"); var gtop = ToTerm(">"); var eqop = ToTerm("="); var ltop = ToTerm("<"); var neqop = ToTerm("<>"); var gteop = ToTerm(">="); var lteop = ToTerm("<="); #endregion #region Literals var BoolToken = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE"); BoolToken.Priority = TerminalPriority.Bool; var NumberToken = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None); NumberToken.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }; var TextToken = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak); var ErrorToken = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A"); var RefErrorToken = ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions var UDFToken = new RegexBasedTerminal(GrammarNames.TokenUDF, "(_xll\\.)?[a-zA-Z0-9_.]+\\("); UDFToken.Priority = TerminalPriority.UDF; var ExcelRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\("); ExcelRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction; var ExcelFunction = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + String.Join("|", excelFunctionList) +")\\("); ExcelFunction.Priority = TerminalPriority.ExcelFunction; // Using this instead of Empty allows a more accurate trees var EmptyArgumentToken = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names var VRangeToken = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?[A-Z]{1,4}:[$]?[A-Z]{1,4}"); var HRangeToken = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); const string CellTokenRegex = "[$]?[A-Z]{1,4}[$]?[1-9][0-9]*"; var CellToken = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex); CellToken.Priority = TerminalPriority.CellToken; const string NamedRangeRegex = @"[A-Za-z\\_][\w\.]*"; var NamedRangeToken = new RegexBasedTerminal(GrammarNames.TokenNamedRange, NamedRangeRegex); NamedRangeToken.Priority = TerminalPriority.NamedRange; // To prevent e.g. "A1A1" being parsed as 2 celltokens var NamedRangeCombinationToken = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, "(TRUE|FALSE|" + CellTokenRegex + ")" + NamedRangeRegex); NamedRangeCombinationToken.Priority = TerminalPriority.NamedRangeCombination; const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; var SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, sheetRegEx); SheetToken.Priority = TerminalPriority.SheetToken; const string firstSheetName = "[a-zA-Z0-9]+:"; var MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, firstSheetName + sheetRegEx); MultipleSheetsToken.Priority = TerminalPriority.MultipleSheetsToken; var FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, "[0-9]+"); FileToken.Priority = TerminalPriority.FileToken;; var QuotedFileSheetToken = new RegexBasedTerminal(GrammarNames.TokenFileSheetQuoted, @"'\[\d+\]([" + singleQuotedContent + @"]|'')+'!"); QuotedFileSheetToken.Priority = TerminalPriority.QuotedFileToken; var ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+"); ReservedNameToken.Priority = TerminalPriority.ReservedName; var DDEToken = new RegexBasedTerminal(GrammarNames.TokenDDE, @"'([\[\]" + singleQuotedContent + @"]|'')+'"); #endregion #region Punctuation MarkPunctuation(exclamationMark); MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenSquareParen, CloseSquareParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #endregion #region 2-NonTerminals // Most nonterminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use nonterminals that have been defined previously var Argument = new NonTerminal(GrammarNames.Argument); var Arguments = new NonTerminal(GrammarNames.Arguments); var ArrayColumns = new NonTerminal(GrammarNames.ArrayColumns); var ArrayConstant = new NonTerminal(GrammarNames.ArrayConstant); var ArrayFormula = new NonTerminal(GrammarNames.ArrayFormula); var ArrayRows = new NonTerminal(GrammarNames.ArrayRows); var Bool = new NonTerminal(GrammarNames.Bool); var Cell = new NonTerminal(GrammarNames.Cell); var Constant = new NonTerminal(GrammarNames.Constant); var ConstantArray = new NonTerminal(GrammarNames.ConstantArray); var DynamicDataExchange = new NonTerminal(GrammarNames.DynamicDataExchange); var EmptyArgument = new NonTerminal(GrammarNames.EmptyArgument); var Error = new NonTerminal(GrammarNames.Error); var File = new NonTerminal(GrammarNames.File); var Formula = new NonTerminal(GrammarNames.Formula); var FormulaWithEq = new NonTerminal(GrammarNames.FormulaWithEq); var Function = new NonTerminal(GrammarNames.Function); var FunctionCall = new NonTerminal(GrammarNames.FunctionCall); var HRange = new NonTerminal(GrammarNames.HorizontalRange); var InfixOp = new NonTerminal(GrammarNames.TransientInfixOp); var MultipleSheets = new NonTerminal(GrammarNames.MultipleSheets); var NamedRange = new NonTerminal(GrammarNames.NamedRange); var Number = new NonTerminal(GrammarNames.Number); var PostfixOp = new NonTerminal(GrammarNames.TransientPostfixOp); var Prefix = new NonTerminal(GrammarNames.Prefix); var PrefixOp = new NonTerminal(GrammarNames.TransientPrefixOp); var QuotedFileSheet = new NonTerminal(GrammarNames.QuotedFileSheet); var Reference = new NonTerminal(GrammarNames.Reference); var ReferenceFunction = new NonTerminal(GrammarNames.ReferenceFunction); var ReferenceItem = new NonTerminal(GrammarNames.TransientReferenceItem); var RefError = new NonTerminal(GrammarNames.RefError); var ReservedName = new NonTerminal(GrammarNames.ReservedName); var Sheet = new NonTerminal(GrammarNames.Sheet); var Start = new NonTerminal(GrammarNames.TransientStart); var Text = new NonTerminal(GrammarNames.Text); var Union = new NonTerminal(GrammarNames.Union); var VRange = new NonTerminal(GrammarNames.VerticalRange); #endregion #region 3-Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; //MarkTransient(Formula); ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = Function + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; Function.Rule = ExcelFunction | UDFToken; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); //Arguments.Rule = Argument | Argument + comma + Arguments; EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; //MarkTransient(Argument); PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); //PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop; // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | Prefix + UDFToken + Arguments + CloseParen | DynamicDataExchange ; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | ReferenceFunction | VRange | HRange | RefError ; MarkTransient(ReferenceItem); VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; ReferenceFunction.Rule = ExcelRefFunctionToken + Arguments + CloseParen; QuotedFileSheet.Rule = QuotedFileSheetToken; Sheet.Rule = SheetToken; MultipleSheets.Rule = MultipleSheetsToken; Cell.Rule = CellToken; File.Rule = OpenSquareParen + FileToken + CloseSquareParen; DynamicDataExchange.Rule = File + exclamationMark + DDEToken; NamedRange.Rule = NamedRangeToken | NamedRangeCombinationToken; Prefix.Rule = Sheet | File + Sheet | File + exclamationMark | QuotedFileSheet | MultipleSheets | File + MultipleSheets; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop); RegisterOperators(Precedence.Reference, Associativity.Left, intersectop, colon); RegisterOperators(Precedence.Reference, Associativity.Left, comma); //RegisterOperators(Precedence.ParameterSeparator, comma); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; public const int Reference = 8; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick celltoken because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int NamedRange = -800; public const int ReservedName = -700; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileToken = 1200; public const int SheetToken = 1200; public const int QuotedFileToken = 1200; } #endregion #region Excel function list private static readonly IList excelFunctionList = new List { "ABS", "ACCRINT", "ACCRINTM", "ACOS", "ACOSH", "ADDRESS", "AMORDEGRC", "AMORLINC", "AND", "AREAS", "ASC", "ASIN", "ASINH", "ATAN", "ATAN2", "ATANH", "AVEDEV", "AVERAGE", "AVERAGEA", "AVERAGEIF", "AVERAGEIFS", "BAHTTEXT", "BESSELI", "BESSELJ", "BESSELK", "BESSELY", "BETADIST", "BETAINV", "BIN2DEC", "BIN2HEX", "BIN2OCT", "BINOMDIST", "CALL", "CEILING", "CELL", "CHAR", "CHIDIST", "CHIINV", "CHITEST", "CHOOSE", "CLEAN", "CODE", "COLUMN", "COLUMNS", "COMBIN", "COMPLEX", "CONCATENATE", "CONFIDENCE", "CONVERT", "CORREL", "COS", "COSH", "COUNT", "COUNTA", "COUNTBLANK", "COUNTIF", "COUNTIFS", "COUPDAYBS", "COUPDAYS", "COUPDAYSNC", "COUPNCD", "COUPNUM", "COUPPCD", "COVAR", "CRITBINOM", "CUBEKPIMEMBER", "CUBEMEMBER", "CUBEMEMBERPROPERTY", "CUBERANKEDMEMBER", "CUBESET", "CUBESETCOUNT", "CUBEVALUE", "CUMIPMT", "CUMPRINC", "DATE", "DATEVALUE", "DAVERAGE", "DAY", "DAYS360", "DB", "DCOUNT", "DCOUNTA", "DDB", "DEC2BIN", "DEC2HEX", "DEC2OCT", "DEGREES", "DELTA", "DEVSQ", "DGET", "DISC", "DMAX", "DMIN", "DOLLAR", "DOLLARDE", "DOLLARFR", "DPRODUCT", "DSTDEV", "DSTDEVP", "DSUM", "DURATION", "DVAR", "DVARP", "EDATEEFFECT", "EOMONTH", "ERF", "ERFC", "ERROR.TYPE", "EUROCONVERT", "EVEN", "EXACT", "EXP", "EXPONDIST", "FACT", "FACTDOUBLE", "FALSE", "FDIST", "FIND", "FINV", "FISHER", "FISHERINV", "FIXED", "FLOOR", "FORECAST", "FREQUENCY", "FTEST", "FV", "FVSCHEDULE", "GAMMADIST", "GAMMAINV", "GAMMALN", "GCD", "GEOMEAN", "GESTEP", "GETPIVOTDATA", "GROWTH", "HARMEAN", "HEX2BIN", "HEX2DEC", "HEX2OCT", "HLOOKUP", "HOUR", "HYPERLINK", "HYPGEOMDIST", "IF", "ISBLANK", "IFERROR", "IMABS", "IMAGINARY", "IMARGUMENT", "IMCONJUGATE", "IMCOS", "IMDIV", "IMEXP", "IMLN", "IMLOG10", "IMLOG2", "IMPOWER", "IMPRODUCT", "IMREAL", "IMSIN", "IMSQRT", "IMSUB", "IMSUM", "INFO", "INT", "INTERCEPT", "INTRATE", "IPMT", "IRR", "IS", "ISB", "ISERROR", "ISNA", "ISNUMBER", "ISPMT", "JIS", "KURT", "LARGE", "LCM", "LEFT", "LEFTB", "LEN", "LENB", "LINEST", "LN", "LOG", "LOG10", "LOGEST", "LOGINV", "LOGNORMDIST", "LOOKUP", "LOWER", "MATCH", "MAX", "MAXA", "MDETERM", "MDURATION", "MEDIAN", "MID", "MIDB", "MIN", "MINA", "MINUTE", "MINVERSE", "MIRR", "MMULT", "MOD", "MODE", "MONTH", "MROUND", "MULTINOMIAL", "N", "NA", "NEGBINOMDIST", "NETWORKDAYS", "NOMINAL", "NORMDIST", "NORMINV", "NORMSDIST", "NORMSINV", "NOT", "NOW", "NPER", "NPV", "OCT2BIN", "OCT2DEC", "OCT2HEX", "ODD", "ODDFPRICE", "ODDFYIELD", "ODDLPRICE", "ODDLYIELD", "OR", "PEARSON", "PERCENTILE", "PERCENTRANK", "PERMUT", "PHONETIC", "PI", "PMT", "POISSON", "POWER", "PPMT", "PRICE", "PRICEDISC", "PRICEMAT", "PROB", "PRODUCT", "PROPER", "PV", "QUARTILE", "QUOTIENT", "RADIANS", "RAND", "RANDBETWEEN", "RANK", "RATE", "RECEIVED", "REGISTER.ID", "REPLACE", "REPLACEB", "REPT", "RIGHT", "RIGHTB", "ROMAN", "ROUND", "ROUNDDOWN", "ROUNDUP", "ROW", "ROWS", "RSQ", "RTD", "SEARCH", "SEARCHB", "SECOND", "SERIESSUM", "SIGN", "SIN", "SINH", "SKEW", "SLN", "SLOPE", "SMALL", "SQL.REQUEST", "SQRT", "SQRTPI", "STANDARDIZE", "STDEV", "STDEVA", "STDEVP", "STDEVPA", "STEYX", "SUBSTITUTE", "SUBTOTAL", "SUM", "SUMIF", "SUMIFS", "SUMPRODUCT", "SUMSQ", "SUMX2MY2", "SUMX2PY2", "SUMXMY2", "SYD", "T", "TAN", "TANH", "TBILLEQ", "TBILLPRICE", "TBILLYIELD", "TDIST", "TEXT", "TIME", "TIMEVALUE", "TINV", "TODAY", "TRANSPOSE", "TREND", "TRIM", "TRIMMEAN", "TRUE", "TRUNC", "TTEST", "TYPE", "UPPER", "VALUE", "VAR", "VARA", "VARP", "VARPA", "VDB", "VLOOKUP", "WEEKDAY", "WEEKNUM", "WEIBULL", "WORKDAY", "XIRR", "XNPV", "YEAR", "YEARFRAC", "YIELD", "YIELDDISC", "YIELDMAT", "ZTEST" }; #endregion } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string Function = "Function"; public const string FunctionCall = "FunctionCall"; public const string HorizontalRange = "HRange"; public const string MultipleSheets = "MultipleSheets"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunction = "ReferenceFunction"; public const string RefError = "RefError"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string Text = "Text"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenDDE = "DDEToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenFileSheetQuoted = "FileSheetQuotedToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenNamedRange = "NamedRangeToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSheet = "SheetNameToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v100/ExcelFormulaParser.cs ================================================ using System; using System.Collections; using System.Collections.Generic; using System.Data; using System.IO; using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v100 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Singleton parser instance /// private readonly static Parser p = new Parser(new ExcelFormulaGrammar()); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = p.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } return tree; } /// /// All non-terminal nodes in depth-first pre-order /// // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodes(this ParseTreeNode root) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Whether this tree contains any nodes of a type /// public static bool Contains(this ParseTreeNode root, string type) { return root.AllNodes(type).Any(); } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return IsNamedFunction(input) || IsBinaryOperation(input) || IsUnaryOperation(input) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.Reference)) && input.ChildNodes.Count() == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) && input.ChildNodes.Count() == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) && input.ChildNodes.Count() == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (IsIntersection(input)) { return GrammarNames.TokenIntersect; } if (IsBinaryOperation(input) || IsUnaryPostfixOperation(input)) { return input.ChildNodes[1].Print(); } if (IsUnaryPrefixOperation(input)) { return input.ChildNodes[0].Print(); } if (input.Is(GrammarNames.ReferenceFunction) || input.Is(GrammarNames.FunctionCall)) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.Is(GrammarNames.Reference)) { if (input.ChildNodes.Count == 3 && input.ChildNodes[2].Is(GrammarNames.Arguments)) { return RemoveFinalSymbol(input.ChildNodes[1].Print()).ToUpper(); } } throw new ArgumentException("Not a function call", "input"); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, String functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.Is(GrammarNames.ExcelFunction) || node.Is(GrammarNames.ReferenceFunction)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { try { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } catch (Exception) { return false; } } /// /// Check if a reference node is a union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes.Exists(pt => pt.Term.Name == GrammarNames.Function)) || input.Is(GrammarNames.ReferenceFunction) // User defined function with prefix || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 3 && input.ChildNodes[2].Is(GrammarNames.Arguments)); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: return SkipToRelevant(input.ChildNodes[1]); case GrammarNames.Formula: case GrammarNames.Reference: // This also catches parentheses if (input.ChildNodes.Count == 1) { return SkipToRelevant(input.ChildNodes[0]); } goto default; default: return input; } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed childs var childs = input.ChildNodes.Select(Print); // Concrete list when needed List childsL; // Switch on nonterminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? String.Format("({0})", childs.First()) : childs.First(); case GrammarNames.FunctionCall: if (IsNamedFunction(input)) { return String.Join("", childs) + ")"; } childsL = childs.ToList(); if (IsBinaryOperation(input)) { return String.Format("{0} {1} {2}", childsL[0], childsL[1], childsL[2]); } // Unary function return String.Join("", childsL); case GrammarNames.Reference: if (IsParentheses(input) || IsUnion(input)) { return String.Format("({0})", childs.First()); } childsL = childs.ToList(); if (IsIntersection(input)) { return String.Format("{0} {1}", childsL[0], childsL[2]); } if (IsBinaryOperation(input)) { return String.Format("{0}{1}{2}", childsL[0], childsL[1], childsL[2]); } return String.Join("", childsL); case GrammarNames.ReferenceFunction: return String.Join("", childs) + ")"; case GrammarNames.File: return String.Format("[{0}]", childs.First()); case GrammarNames.Prefix: var ret = String.Join("", childs); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + childs.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: return String.Join("", childs); // Terms for which we print the childs comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return String.Join(",", childs); case GrammarNames.ArrayColumns: return String.Join(";", childs); case GrammarNames.ConstantArray: return String.Format("{{{0}}}", childs.First()); default: // If it is not defined above and the number of childs is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return childs.First(); } throw new ArgumentException(String.Format("Could not print node of type '{0}'.\nThis probably means the excel grammar was modified without the print function being modified", input.Term.Name)); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v114/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; namespace XLParser.Web.XLParserVersions.v114 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.1.3", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { public ExcelFormulaGrammar() : base(false) { #region 1-Terminals #region Symbols and operators var comma = ToTerm(","); var colon = ToTerm(":"); var semicolon = ToTerm(";"); var OpenParen = ToTerm("("); var CloseParen = ToTerm(")"); var CloseSquareParen = ToTerm("]"); var OpenSquareParen = ToTerm("["); var exclamationMark = ToTerm("!"); var CloseCurlyParen = ToTerm("}"); var OpenCurlyParen = ToTerm("{"); var mulop = ToTerm("*"); var plusop = ToTerm("+"); var divop = ToTerm("/"); var minop = ToTerm("-"); var concatop = ToTerm("&"); var expop = ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and it's default priority is low var intersectop = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); var percentop = ToTerm("%"); var gtop = ToTerm(">"); var eqop = ToTerm("="); var ltop = ToTerm("<"); var neqop = ToTerm("<>"); var gteop = ToTerm(">="); var lteop = ToTerm("<="); #endregion #region Literals var BoolToken = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE"); BoolToken.Priority = TerminalPriority.Bool; var NumberToken = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None); NumberToken.DefaultIntTypes = new TypeCode[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt }; var TextToken = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak); var ErrorToken = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A"); var RefErrorToken = ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions var UDFToken = new RegexBasedTerminal(GrammarNames.TokenUDF, @"(_xll\.)?[\w\\.]+\("); UDFToken.Priority = TerminalPriority.UDF; var ExcelRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\("); ExcelRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction; var ExcelConditionalRefFunctionToken = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\("); ExcelConditionalRefFunctionToken.Priority = TerminalPriority.ExcelRefFunction; var ExcelFunction = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + String.Join("|", excelFunctionList) +")\\("); ExcelFunction.Priority = TerminalPriority.ExcelFunction; // Using this instead of Empty allows a more accurate trees var EmptyArgumentToken = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names var VRangeToken = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?[A-Z]{1,4}:[$]?[A-Z]{1,4}"); var HRangeToken = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); const string CellTokenRegex = "[$]?[A-Z]{1,4}[$]?[1-9][0-9]*"; var CellToken = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex); CellToken.Priority = TerminalPriority.CellToken; const string NamedRangeRegex = @"[A-Za-z\\_][\w\.]*"; var NamedRangeToken = new RegexBasedTerminal(GrammarNames.TokenNamedRange, NamedRangeRegex); NamedRangeToken.Priority = TerminalPriority.NamedRange; // To prevent e.g. "A1A1" being parsed as 2 celltokens var NamedRangeCombinationToken = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, "(TRUE|FALSE|" + CellTokenRegex + ")" + NamedRangeRegex); NamedRangeCombinationToken.Priority = TerminalPriority.NamedRangeCombination; const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; const string normalSheetName = "[^" + notSheetNameChars + mustBeQuotedInSheetName + "]+"; const string quotedSheetName = "([^" + notSheetNameChars + "]|'')+"; const string sheetRegEx = "((" + normalSheetName + ")|('" + quotedSheetName + "'))!"; var SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, sheetRegEx); SheetToken.Priority = TerminalPriority.SheetToken; var multiSheetRegex = String.Format("(({0}:{0})|('{1}:{1}'))!", normalSheetName, quotedSheetName); var MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex); MultipleSheetsToken.Priority = TerminalPriority.MultipleSheetsToken; var FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, "[0-9]+"); FileToken.Priority = TerminalPriority.FileToken;; const string quotedFileSheetRegex = @"'\[\d+\]" + quotedSheetName + "'!"; var QuotedFileSheetToken = new RegexBasedTerminal(GrammarNames.TokenFileSheetQuoted, quotedFileSheetRegex); QuotedFileSheetToken.Priority = TerminalPriority.QuotedFileToken; var ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+"); ReservedNameToken.Priority = TerminalPriority.ReservedName; var DDEToken = new RegexBasedTerminal(GrammarNames.TokenDDE, @"'([^']|'')+'"); #endregion #region Punctuation MarkPunctuation(exclamationMark); MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenSquareParen, CloseSquareParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #endregion #region 2-NonTerminals // Most nonterminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use nonterminals that have been defined previously var Argument = new NonTerminal(GrammarNames.Argument); var Arguments = new NonTerminal(GrammarNames.Arguments); var ArrayColumns = new NonTerminal(GrammarNames.ArrayColumns); var ArrayConstant = new NonTerminal(GrammarNames.ArrayConstant); var ArrayFormula = new NonTerminal(GrammarNames.ArrayFormula); var ArrayRows = new NonTerminal(GrammarNames.ArrayRows); var Bool = new NonTerminal(GrammarNames.Bool); var Cell = new NonTerminal(GrammarNames.Cell); var Constant = new NonTerminal(GrammarNames.Constant); var ConstantArray = new NonTerminal(GrammarNames.ConstantArray); var DynamicDataExchange = new NonTerminal(GrammarNames.DynamicDataExchange); var EmptyArgument = new NonTerminal(GrammarNames.EmptyArgument); var Error = new NonTerminal(GrammarNames.Error); var File = new NonTerminal(GrammarNames.File); var Formula = new NonTerminal(GrammarNames.Formula); var FormulaWithEq = new NonTerminal(GrammarNames.FormulaWithEq); var FunctionCall = new NonTerminal(GrammarNames.FunctionCall); var FunctionName = new NonTerminal(GrammarNames.FunctionName); var HRange = new NonTerminal(GrammarNames.HorizontalRange); var InfixOp = new NonTerminal(GrammarNames.TransientInfixOp); var MultipleSheets = new NonTerminal(GrammarNames.MultipleSheets); var NamedRange = new NonTerminal(GrammarNames.NamedRange); var Number = new NonTerminal(GrammarNames.Number); var PostfixOp = new NonTerminal(GrammarNames.TransientPostfixOp); var Prefix = new NonTerminal(GrammarNames.Prefix); var PrefixOp = new NonTerminal(GrammarNames.TransientPrefixOp); var QuotedFileSheet = new NonTerminal(GrammarNames.QuotedFileSheet); var Reference = new NonTerminal(GrammarNames.Reference); //var ReferenceFunction = new NonTerminal(GrammarNames.ReferenceFunction); var ReferenceItem = new NonTerminal(GrammarNames.TransientReferenceItem); var ReferenceFunctionCall = new NonTerminal(GrammarNames.ReferenceFunctionCall); var RefError = new NonTerminal(GrammarNames.RefError); var RefFunctionName = new NonTerminal(GrammarNames.RefFunctionName); var ReservedName = new NonTerminal(GrammarNames.ReservedName); var Sheet = new NonTerminal(GrammarNames.Sheet); var Start = new NonTerminal(GrammarNames.TransientStart); var Text = new NonTerminal(GrammarNames.Text); var UDFName = new NonTerminal(GrammarNames.UDFName); var UDFunctionCall = new NonTerminal(GrammarNames.UDFunctionCall); var Union = new NonTerminal(GrammarNames.Union); var VRange = new NonTerminal(GrammarNames.VerticalRange); #endregion #region 3-Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; //MarkTransient(Formula); ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); //Arguments.Rule = Argument | Argument + comma + Arguments; EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; //MarkTransient(Argument); PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); //PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop; // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen //| ConditionalRefFunctionName + Arguments + CloseParen ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; //ConditionalRefFunctionName.Rule = ExcelConditionalRefFunctionToken; QuotedFileSheet.Rule = QuotedFileSheetToken; Sheet.Rule = SheetToken; MultipleSheets.Rule = MultipleSheetsToken; Cell.Rule = CellToken; File.Rule = OpenSquareParen + FileToken + CloseSquareParen; DynamicDataExchange.Rule = File + exclamationMark + DDEToken; NamedRange.Rule = NamedRangeToken | NamedRangeCombinationToken; Prefix.Rule = Sheet | File + Sheet | File + exclamationMark | QuotedFileSheet | MultipleSheets | File + MultipleSheets; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); //RegisterOperators(Precedence.ParameterSeparator, comma); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick celltoken because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int NamedRange = -800; public const int ReservedName = -700; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileToken = 1200; public const int SheetToken = 1200; public const int QuotedFileToken = 1200; } #endregion #region Excel function list private static readonly IList excelFunctionList = new List { "ABS", "ACCRINT", "ACCRINTM", "ACOS", "ACOSH", "ADDRESS", "AMORDEGRC", "AMORLINC", "AND", "AREAS", "ASC", "ASIN", "ASINH", "ATAN", "ATAN2", "ATANH", "AVEDEV", "AVERAGE", "AVERAGEA", "AVERAGEIF", "AVERAGEIFS", "BAHTTEXT", "BESSELI", "BESSELJ", "BESSELK", "BESSELY", "BETADIST", "BETAINV", "BIN2DEC", "BIN2HEX", "BIN2OCT", "BINOMDIST", "CALL", "CEILING", "CELL", "CHAR", "CHIDIST", "CHIINV", "CHITEST", //"CHOOSE", "CLEAN", "CODE", "COLUMN", "COLUMNS", "COMBIN", "COMPLEX", "CONCATENATE", "CONFIDENCE", "CONVERT", "CORREL", "COS", "COSH", "COUNT", "COUNTA", "COUNTBLANK", "COUNTIF", "COUNTIFS", "COUPDAYBS", "COUPDAYS", "COUPDAYSNC", "COUPNCD", "COUPNUM", "COUPPCD", "COVAR", "CRITBINOM", "CUBEKPIMEMBER", "CUBEMEMBER", "CUBEMEMBERPROPERTY", "CUBERANKEDMEMBER", "CUBESET", "CUBESETCOUNT", "CUBEVALUE", "CUMIPMT", "CUMPRINC", "DATE", "DATEVALUE", "DAVERAGE", "DAY", "DAYS360", "DB", "DCOUNT", "DCOUNTA", "DDB", "DEC2BIN", "DEC2HEX", "DEC2OCT", "DEGREES", "DELTA", "DEVSQ", "DGET", "DISC", "DMAX", "DMIN", "DOLLAR", "DOLLARDE", "DOLLARFR", "DPRODUCT", "DSTDEV", "DSTDEVP", "DSUM", "DURATION", "DVAR", "DVARP", "EDATEEFFECT", "EOMONTH", "ERF", "ERFC", "ERROR.TYPE", "EUROCONVERT", "EVEN", "EXACT", "EXP", "EXPONDIST", "FACT", "FACTDOUBLE", "FALSE", "FDIST", "FIND", "FINV", "FISHER", "FISHERINV", "FIXED", "FLOOR", "FORECAST", "FREQUENCY", "FTEST", "FV", "FVSCHEDULE", "GAMMADIST", "GAMMAINV", "GAMMALN", "GCD", "GEOMEAN", "GESTEP", "GETPIVOTDATA", "GROWTH", "HARMEAN", "HEX2BIN", "HEX2DEC", "HEX2OCT", "HLOOKUP", "HOUR", "HYPERLINK", "HYPGEOMDIST", //"IF", "ISBLANK", "IFERROR", "IMABS", "IMAGINARY", "IMARGUMENT", "IMCONJUGATE", "IMCOS", "IMDIV", "IMEXP", "IMLN", "IMLOG10", "IMLOG2", "IMPOWER", "IMPRODUCT", "IMREAL", "IMSIN", "IMSQRT", "IMSUB", "IMSUM", "INFO", "INT", "INTERCEPT", "INTRATE", "IPMT", "IRR", "IS", "ISB", "ISERROR", "ISNA", "ISNUMBER", "ISPMT", "JIS", "KURT", "LARGE", "LCM", "LEFT", "LEFTB", "LEN", "LENB", "LINEST", "LN", "LOG", "LOG10", "LOGEST", "LOGINV", "LOGNORMDIST", "LOOKUP", "LOWER", "MATCH", "MAX", "MAXA", "MDETERM", "MDURATION", "MEDIAN", "MID", "MIDB", "MIN", "MINA", "MINUTE", "MINVERSE", "MIRR", "MMULT", "MOD", "MODE", "MONTH", "MROUND", "MULTINOMIAL", "N", "NA", "NEGBINOMDIST", "NETWORKDAYS", "NOMINAL", "NORMDIST", "NORMINV", "NORMSDIST", "NORMSINV", "NOT", "NOW", "NPER", "NPV", "OCT2BIN", "OCT2DEC", "OCT2HEX", "ODD", "ODDFPRICE", "ODDFYIELD", "ODDLPRICE", "ODDLYIELD", "OR", "PEARSON", "PERCENTILE", "PERCENTRANK", "PERMUT", "PHONETIC", "PI", "PMT", "POISSON", "POWER", "PPMT", "PRICE", "PRICEDISC", "PRICEMAT", "PROB", "PRODUCT", "PROPER", "PV", "QUARTILE", "QUOTIENT", "RADIANS", "RAND", "RANDBETWEEN", "RANK", "RATE", "RECEIVED", "REGISTER.ID", "REPLACE", "REPLACEB", "REPT", "RIGHT", "RIGHTB", "ROMAN", "ROUND", "ROUNDDOWN", "ROUNDUP", "ROW", "ROWS", "RSQ", "RTD", "SEARCH", "SEARCHB", "SECOND", "SERIESSUM", "SIGN", "SIN", "SINH", "SKEW", "SLN", "SLOPE", "SMALL", "SQL.REQUEST", "SQRT", "SQRTPI", "STANDARDIZE", "STDEV", "STDEVA", "STDEVP", "STDEVPA", "STEYX", "SUBSTITUTE", "SUBTOTAL", "SUM", "SUMIF", "SUMIFS", "SUMPRODUCT", "SUMSQ", "SUMX2MY2", "SUMX2PY2", "SUMXMY2", "SYD", "T", "TAN", "TANH", "TBILLEQ", "TBILLPRICE", "TBILLYIELD", "TDIST", "TEXT", "TIME", "TIMEVALUE", "TINV", "TODAY", "TRANSPOSE", "TREND", "TRIM", "TRIMMEAN", "TRUE", "TRUNC", "TTEST", "TYPE", "UPPER", "VALUE", "VAR", "VARA", "VARP", "VARPA", "VDB", "VLOOKUP", "WEEKDAY", "WEEKNUM", "WEIBULL", "WORKDAY", "XIRR", "XNPV", "YEAR", "YEARFRAC", "YIELD", "YIELDDISC", "YIELDMAT", "ZTEST" }; #endregion } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultipleSheets = "MultipleSheets"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; //public const string ReferenceFunction = "ReferenceFunction"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenDDE = "DDEToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenFileSheetQuoted = "FileSheetQuotedToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenNamedRange = "NamedRangeToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSheet = "SheetNameToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v114/ExcelFormulaParser.cs ================================================ using System; using System.Collections; using System.Collections.Generic; using System.Data; using System.IO; using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v114 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser p { get { return _p ?? (_p = new Parser(new ExcelFormulaGrammar())); } } /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = p.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Whether this tree contains any nodes of a type /// public static bool Contains(this ParseTreeNode root, string type) { return root.AllNodes(type).Any(); } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDF's, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count() == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count() == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return String.Format("{0}{1}", input.ChildNodes[0].Print(), GetFunction(input.ChildNodes[1])); } throw new ArgumentException("Not a function call", "input"); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, String functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: return SkipToRelevant(input.ChildNodes[1]); case GrammarNames.Formula: case GrammarNames.Reference: // This also catches parentheses if (input.ChildNodes.Count == 1) { return SkipToRelevant(input.ChildNodes[0]); } goto default; default: return input; } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed childs var childs = input.ChildNodes.Select(Print); // Concrete list when needed List childsL; // Switch on nonterminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? String.Format("({0})", childs.First()) : childs.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childsL = childs.ToList(); if (input.IsNamedFunction()) { return String.Join("", childsL) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0} {1} {2}"; if (input.IsIntersection()) { format = "{0} {2}"; }else if (input.IsBinaryReferenceOperation()) { format = "{0}{1}{2}"; } return String.Format(format, childsL[0], childsL[1], childsL[2]); } if (input.IsUnion()) { return String.Format("({0})", String.Join(",", childsL)); } if (input.IsUnaryOperation()) { return String.Join("", childsL); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: /*if (IsParentheses(input) || IsUnion(input)) { return String.Format("({0})", childs.First()); } childsL = childs.ToList(); if (IsIntersection(input)) { return String.Format("{0} {1}", childsL[0], childsL[2]); } if (IsBinaryOperation(input)) { return String.Format("{0}{1}{2}", childsL[0], childsL[1], childsL[2]); }*/ if (IsParentheses(input)) { return String.Format("({0})", childs.First()); } return String.Join("", childs); case GrammarNames.File: return String.Format("[{0}]", childs.First()); case GrammarNames.Prefix: var ret = String.Join("", childs); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + childs.ElementAt(1) + "}"; case GrammarNames.DynamicDataExchange: childsL = childs.ToList(); return String.Format("{0}!{1}", childsL[0], childsL[1]); // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.FormulaWithEq: return String.Join("", childs); // Terms for which we print the childs comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return String.Join(",", childs); case GrammarNames.ArrayColumns: return String.Join(";", childs); case GrammarNames.ConstantArray: return String.Format("{{{0}}}", childs.First()); default: // If it is not defined above and the number of childs is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return childs.First(); } throw new ArgumentException(String.Format("Could not print node of type '{0}'.\nThis probably means the excel grammar was modified without the print function being modified", input.Term.Name)); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v120/ExcelFormulaGrammar.cs ================================================ using System; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v120 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.2.0", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and it's default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt} }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, @"(_xll\.)?[\w\\.]+\(") { Priority = TerminalPriority.UDF }; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(") { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate trees public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?[A-Z]{1,4}:[$]?[A-Z]{1,4}"); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); private const string CellTokenRegex = "[$]?[A-Z]{1,4}[$]?[1-9][0-9]*"; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex) { Priority = TerminalPriority.CellToken }; private const string NamedRangeRegex = @"[A-Za-z\\_][\w\.]*"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NamedRangeRegex) { Priority = TerminalPriority.Name }; // To prevent e.g. "A1A1" being parsed as 2 celltokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, "(TRUE|FALSE|" + CellTokenRegex + ")" + NamedRangeRegex) { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+") { Priority = TerminalPriority.ReservedName }; #region Structured References //public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[") //{Priority = 0}; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+") { Priority = TerminalPriority.SRColumn }; //public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')+"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"(({normalSheetName}:{normalSheetName})|('{quotedSheetName}:{quotedSheetName}'))!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\]"; public Terminal FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex) { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameRegex = @"\[[^<>:""/\|?*\[\]]+\]"; public Terminal EnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenEnclosedInBrackets, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string filePathRegex = @"(?:[a-zA-Z]\:|\\\\[\w\.]+\\[\w.$]+)\\(?:[\w]+\\)*"; public Terminal FilePathWindowsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePathWindows, filePathRegex); #endregion #endregion #endregion #region 2-NonTerminals // Most nonterminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use nonterminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); //public NonTerminal ReferenceFunction{ get; } = new NonTerminal(GrammarNames.ReferenceFunction); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructureReference { get; } = new NonTerminal(GrammarNames.StructureReference); public NonTerminal StructureReferenceColumnOrKeyword { get; } = new NonTerminal(GrammarNames.StructureReferenceColumnOrKeyword); public NonTerminal StructureReferenceExpression { get; } = new NonTerminal(GrammarNames.StructureReferenceExpression); //public NonTerminal StructureReferenceKeyword { get; } = new NonTerminal(GrammarNames.StructureReferenceKeyword); public NonTerminal StructureReferenceTable { get; } = new NonTerminal(GrammarNames.StructureReferenceTable); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() : base(false) { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenSquareParen, CloseSquareParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); //exclamationMark.SetFlag(TermFlags.IsDelimiter); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; //MarkTransient(Formula); ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); //Arguments.Rule = Argument | Argument + comma + Arguments; EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; //MarkTransient(Argument); PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); //PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop; // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen //| ConditionalRefFunctionName + Arguments + CloseParen ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructureReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileToken | EnclosedInBracketsToken | FilePathWindowsToken + EnclosedInBracketsToken ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | File + MultipleSheetsToken ; StructureReferenceColumnOrKeyword.Rule = OpenSquareParen + SRColumnToken + CloseSquareParen | OpenSquareParen + NameToken + CloseSquareParen | EnclosedInBracketsToken; //StructureReferenceKeyword.Rule = EnclosedInBracketsToken; StructureReferenceTable.Rule = NameToken; StructureReferenceExpression.Rule = StructureReferenceColumnOrKeyword | StructureReferenceColumnOrKeyword + colon + StructureReferenceColumnOrKeyword | StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword | StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword + colon + StructureReferenceColumnOrKeyword | StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword | StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword + comma + StructureReferenceColumnOrKeyword + colon + StructureReferenceColumnOrKeyword ; StructureReference.Rule = StructureReferenceColumnOrKeyword | OpenSquareParen + StructureReferenceExpression + CloseSquareParen | StructureReferenceTable + StructureReferenceColumnOrKeyword | StructureReferenceTable + OpenSquareParen + StructureReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); //RegisterOperators(Precedence.ParameterSeparator, comma); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick celltoken because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int SRColumn = -900; public const int Name = -800; public const int ReservedName = -700; public const int FileName = -500; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => Properties.Resources.ExcelBuiltinFunctionList_v120.Split(new [] {'\n', '\r'}, StringSplitOptions.RemoveEmptyEntries); } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructureReference = "StructureReference"; public const string StructureReferenceColumnOrKeyword = "StructureReferenceColumnOrKeyword"; public const string StructureReferenceExpression = "StructureReferenceExpression"; public const string StructureReferenceTable = "StructureReferenceTable"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePathWindows = "FilePathWindowsToken"; public const string TokenEnclosedInBrackets = "EnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRTableName = "SRTableName"; public const string TokenSRKeyword = "SRKeyword"; public const string TokenSRColumn = "SRColumn"; public const string TokenSREnclosedColumn = "SREnclosedColumn"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v120/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v120 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser p => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = p.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDF's, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count() == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count() == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" nonterminal .ChildNodes // "Argument" nonterminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument nonterminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix nonterminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) { if(prefix.Type() != GrammarNames.Prefix) throw new ArgumentException("Not a prefix", nameof(prefix)); string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing int cur = 0; // Check for quotes bool quoted = prefix.ChildNodes[cur].Is("'"); if (quoted) cur++; // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { var file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename int n; int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out n); fileNumber = n; if (fileNumber == 0) fileNumber = null; } else { // String filename var icur = 0; // Check if it includes a path if (file.ChildNodes[icur].Is(GrammarNames.TokenFilePathWindows)) { filePath = file.ChildNodes[icur].Print(); icur++; } fileName = Substr(file.ChildNodes[icur].Print(), 1, 1); } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } // Put it all into the convencience class return new PrefixInfo( sheetName, fileNumber, fileName, filePath, multipleSheets, quoted ); } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length-removeLast-removeFirst); } /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: case GrammarNames.Reference: // This also catches parentheses if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed childs var childs = input.ChildNodes.Select(Print); // Concrete list when needed List childsL; string ret; // Switch on nonterminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({childs.First()})" : childs.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childsL = childs.ToList(); if (input.IsNamedFunction()) { return string.Join("", childsL) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0} {1} {2}"; if (input.IsIntersection()) { format = "{0} {2}"; }else if (input.IsBinaryReferenceOperation()) { format = "{0}{1}{2}"; } return string.Format(format, childsL[0], childsL[1], childsL[2]); } if (input.IsUnion()) { return $"({string.Join(",", childsL)})"; } if (input.IsUnaryOperation()) { return string.Join("", childsL); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: if (IsParentheses(input)) { return $"({childs.First()})"; } return string.Join("", childs); case GrammarNames.Prefix: ret = string.Join("", childs); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + childs.ElementAt(1) + "}"; case GrammarNames.StructureReference: ret = ""; var hastable = input.ChildNodes.Count == 2; var contentsNode = hastable ? 1 : 0; childsL = childs.ToList(); if (hastable) ret += childsL[0]; if (input.ChildNodes[contentsNode].Is(GrammarNames.StructureReferenceColumnOrKeyword)) { ret += childsL[contentsNode]; } else { ret += $"[{childsL[contentsNode]}]"; } return ret; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.StructureReferenceExpression: return string.Join("", childs); // Terms for which we print the childs comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", childs); case GrammarNames.ArrayColumns: return string.Join(";", childs); case GrammarNames.ConstantArray: return $"{{{childs.First()}}}"; default: // If it is not defined above and the number of childs is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return childs.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'.\nThis probably means the excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v120/PrefixInfo.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace XLParser.Web.XLParserVersions.v120 { public class PrefixInfo { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? fileNumber; public int FileNumber => fileNumber.Value; public bool HasFileNumber => fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; this.fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } public override string ToString() { string res = ""; if (IsQuoted) res += "'"; if (HasFilePath) res += FilePath; if (HasFileNumber) res += $"[{FileNumber}]"; if (HasFileName) res += $"[{FileName}]"; if (HasSheet) res += Sheet; if (HasMultipleSheets) res += MultipleSheets; if (IsQuoted) res += "'"; res += "!"; return res; } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v139/ExcelFormulaGrammar.cs ================================================ using System; using System.IO; using System.Reflection; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v139 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.3.9", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt} }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, $@"('[^<>""/\|?*]+\.xla'!|_xll\.)?[\w{SpecialUdfChars}\\.]+\(") { Priority = TerminalPriority.UDF }; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(") { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])"; public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*"; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex) { Priority = TerminalPriority.CellToken }; // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*") { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NameInvalidWordsRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+") { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+") { Priority = TerminalPriority.ReservedName }; #region Structured References //public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[") //{Priority = 0}; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+") { Priority = TerminalPriority.SRColumn }; //public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\]"; public Terminal FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex) { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameForbiddenCharacter = @"<>:""/\|?*"; private const string fileNameRegex = @"\[[^\[\]]+\]"; public Terminal EnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenEnclosedInBrackets, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string filePathRegex = @"(?:[a-zA-Z]\:|\\\\[\w\.]+\\[\w.$]+)\\(([^" + fileNameForbiddenCharacter + @"\\]| )+\\)*"; public Terminal FilePathWindowsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePathWindows, filePathRegex); #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); //public NonTerminal ReferenceFunction{ get; } = new NonTerminal(GrammarNames.ReferenceFunction); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); //public NonTerminal StructuredReferenceKeyword { get; } = new NonTerminal(GrammarNames.StructuredReferenceKeyword); public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() : base(false) { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenSquareParen, CloseSquareParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); //exclamationMark.SetFlag(TermFlags.IsDelimiter); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; //MarkTransient(Formula); ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); //Arguments.Rule = Argument | Argument + comma + Arguments; EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; //MarkTransient(Argument); PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); //PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop; // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileToken | EnclosedInBracketsToken | FilePathWindowsToken + EnclosedInBracketsToken ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken ; StructuredReferenceElement.Rule = OpenSquareParen + SRColumnToken + CloseSquareParen | OpenSquareParen + NameToken + CloseSquareParen | EnclosedInBracketsToken; //StructuredReferenceKeyword.Rule = EnclosedInBracketsToken; StructuredReferenceTable.Rule = NameToken; StructuredReferenceExpression.Rule = StructuredReferenceElement | at + StructuredReferenceElement | StructuredReferenceElement + colon + StructuredReferenceElement | at + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement ; StructuredReference.Rule = StructuredReferenceElement | OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceTable + StructuredReferenceElement | StructuredReferenceTable + OpenSquareParen + CloseSquareParen | StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); //RegisterOperators(Precedence.ParameterSeparator, comma); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int SRColumn = -900; public const int Name = -800; public const int ReservedName = -700; public const int FileName = -500; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v139; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceElement = "StructuredReferenceElement"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceTable = "StructuredReferenceTable"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePathWindows = "FilePathWindowsToken"; public const string TokenEnclosedInBrackets = "EnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRTableName = "SRTableName"; public const string TokenSRKeyword = "SRKeyword"; public const string TokenSRColumn = "SRColumn"; public const string TokenSREnclosedColumn = "SREnclosedColumn"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v139/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v139 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.LocationString = node.Print(); list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0} {1} {2}"; if (input.IsIntersection()) { format = "{0} {2}"; }else if (input.IsBinaryReferenceOperation()) { format = "{0}{1}{2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; case GrammarNames.StructuredReference: var sb = new StringBuilder(); var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable); var contentsNode = hashtable ? 1 : 0; childrenList = children.ToList(); if (hashtable) { sb.Append(childrenList[0]); } if (hashtable && input.ChildNodes.Count == 1) { // Full table reference sb.Append("[]"); } else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement)) { sb.Append(childrenList[contentsNode]); } else { sb.Append($"[{childrenList[contentsNode]}]"); } return sb.ToString(); // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.StructuredReferenceExpression: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v139/ParserReference.cs ================================================ using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v139 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError } public class ParserReference { public const int MaxRangeHeight = 100; public const int MaxRangeWidth = 100; public ReferenceType ReferenceType { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FileName { get; set; } public string Name { get; private set; } public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1 public string MaxLocation { get; set; } public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null) { ReferenceType = referenceType; LocationString = locationString; Worksheet = worksheet; LastWorksheet = lastWorksheet; FileName = fileName; Name = name; MinLocation = minLocation; MaxLocation = maxLocation != null ? maxLocation : minLocation; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } else { FileName = null; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; if (FileName != null) { MinLocation = "A1"; } break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = "A" + horizontalLimits[0]; MaxLocation = ConvertColumnToStr(MaxRangeWidth - 1) + horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0] + "1"; MaxLocation = verticalLimits[1] + MaxRangeHeight; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; MinLocation = "A1"; break; default: // UDFs MinLocation = "A1"; break; } LocationString = node.Print(); } /// /// Converts the column number to an Excel column string representation. /// /// The zero-based column number. private string ConvertColumnToStr(int columnNumber) { var sb = new System.Text.StringBuilder(); while (columnNumber >= 0) { sb.Insert(0, (char)(65 + columnNumber % 26)); columnNumber = columnNumber / 26 - 1; } return sb.ToString(); } public override string ToString() { return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v139/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v139 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePathWindows)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v141/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.IO; using System.Reflection; namespace XLParser.Web.XLParserVersions.v141 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.4.1", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt} }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF}; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(") { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])"; public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*"; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex) { Priority = TerminalPriority.CellToken }; // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*") { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NameInvalidWordsRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+") { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+") { Priority = TerminalPriority.ReservedName }; #region Structured References //public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[") //{Priority = 0}; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+") { Priority = TerminalPriority.SRColumn }; //public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\]"; public Terminal FileToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex) { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameForbiddenCharacter = @"<>:""/\|?*"; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\]"; public Terminal EnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenEnclosedInBrackets, fileNameInBracketsRegex) { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.]+\..{1,4}"; public Terminal FileNameWindowsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameWindows, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string filePathRegex = @"(?:[a-zA-Z]\:|\\\\[\w\.]+\\[\w.$]+)\\(([^" + fileNameForbiddenCharacter + @"\\]| )+\\)*"; public Terminal FilePathWindowsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePathWindows, filePathRegex); #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); //public NonTerminal ReferenceFunction{ get; } = new NonTerminal(GrammarNames.ReferenceFunction); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); //public NonTerminal StructuredReferenceKeyword { get; } = new NonTerminal(GrammarNames.StructuredReferenceKeyword); public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() : base(false) { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenSquareParen, CloseSquareParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); //exclamationMark.SetFlag(TermFlags.IsDelimiter); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; //MarkTransient(Formula); ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); //Arguments.Rule = Argument | Argument + comma + Arguments; EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; //MarkTransient(Argument); PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); //PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop; // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileToken | EnclosedInBracketsToken | FilePathWindowsToken + EnclosedInBracketsToken | FilePathWindowsToken + FileNameWindowsToken ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceElement.Rule = OpenSquareParen + SRColumnToken + CloseSquareParen | OpenSquareParen + NameToken + CloseSquareParen | EnclosedInBracketsToken; //StructuredReferenceKeyword.Rule = EnclosedInBracketsToken; StructuredReferenceTable.Rule = NameToken; StructuredReferenceExpression.Rule = StructuredReferenceElement | at + StructuredReferenceElement | StructuredReferenceElement + colon + StructuredReferenceElement | at + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement ; StructuredReference.Rule = StructuredReferenceElement | OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceTable + StructuredReferenceElement | StructuredReferenceTable + OpenSquareParen + CloseSquareParen | StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); //RegisterOperators(Precedence.ParameterSeparator, comma); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int SRColumn = -900; public const int Name = -800; public const int ReservedName = -700; public const int FileName = -500; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v141; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceElement = "StructuredReferenceElement"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceTable = "StructuredReferenceTable"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePathWindows = "FilePathWindowsToken"; public const string TokenFileNameWindows = "FileNameWindowsToken"; public const string TokenEnclosedInBrackets = "EnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRTableName = "SRTableName"; public const string TokenSRKeyword = "SRKeyword"; public const string TokenSRColumn = "SRColumn"; public const string TokenSREnclosedColumn = "SREnclosedColumn"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v141/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v141 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.LocationString = node.Print(); list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0} {1} {2}"; if (input.IsIntersection()) { format = "{0} {2}"; }else if (input.IsBinaryReferenceOperation()) { format = "{0}{1}{2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; case GrammarNames.StructuredReference: var sb = new StringBuilder(); var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable); var contentsNode = hashtable ? 1 : 0; childrenList = children.ToList(); if (hashtable) { sb.Append(childrenList[0]); } if (hashtable && input.ChildNodes.Count == 1) { // Full table reference sb.Append("[]"); } else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement)) { sb.Append(childrenList[contentsNode]); } else { sb.Append($"[{childrenList[contentsNode]}]"); } return sb.ToString(); // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.StructuredReferenceExpression: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v141/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v141 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table } public class ParserReference { public const int MaxRangeHeight = 100; public const int MaxRangeWidth = 100; public ReferenceType ReferenceType { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FileName { get; set; } public string Name { get; private set; } public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1 public string MaxLocation { get; set; } public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null) { ReferenceType = referenceType; LocationString = locationString; Worksheet = worksheet; LastWorksheet = lastWorksheet; FileName = fileName; Name = name; MinLocation = minLocation; MaxLocation = maxLocation != null ? maxLocation : minLocation; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } else { FileName = null; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceTable)?.ChildNodes[0].Token.ValueString; break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = "A" + horizontalLimits[0]; MaxLocation = ConvertColumnToStr(MaxRangeWidth - 1) + horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0] + "1"; MaxLocation = verticalLimits[1] + MaxRangeHeight; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; } LocationString = node.Print(); } /// /// Converts the column number to an Excel column string representation. /// /// The zero-based column number. private string ConvertColumnToStr(int columnNumber) { var sb = new System.Text.StringBuilder(); while (columnNumber >= 0) { sb.Insert(0, (char)(65 + columnNumber % 26)); columnNumber = columnNumber / 26 - 1; } return sb.ToString(); } public override string ToString() { return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v141/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v141 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePathWindows)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v142/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.IO; using System.Reflection; namespace XLParser.Web.XLParserVersions.v142 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.4.2", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt} }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF}; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(") { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])"; public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*"; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex) { Priority = TerminalPriority.CellToken }; // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*") { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NameInvalidWordsRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+") { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+") { Priority = TerminalPriority.ReservedName }; #region Structured References //public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[") //{Priority = 0}; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+") { Priority = TerminalPriority.SRColumn }; //public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\]"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex) { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\]"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex) { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string fileNameForbiddenCharacter = @"<>:""/\|?*"; private const string filePathRegex = @"(?:[a-zA-Z]:|https?:\\|\\?\\?[\w\.-]+\\[\w.$]+)\\(([^" + fileNameForbiddenCharacter + @"\\]| )+\\)*"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex); #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); //public NonTerminal ReferenceFunction{ get; } = new NonTerminal(GrammarNames.ReferenceFunction); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); //public NonTerminal StructuredReferenceKeyword { get; } = new NonTerminal(GrammarNames.StructuredReferenceKeyword); public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() : base(false) { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenSquareParen, CloseSquareParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); //exclamationMark.SetFlag(TermFlags.IsDelimiter); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; //MarkTransient(Formula); ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); //Arguments.Rule = Argument | Argument + comma + Arguments; EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; //MarkTransient(Argument); PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); //PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop; // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceElement.Rule = OpenSquareParen + SRColumnToken + CloseSquareParen | OpenSquareParen + NameToken + CloseSquareParen | FileNameEnclosedInBracketsToken; //StructuredReferenceKeyword.Rule = EnclosedInBracketsToken; StructuredReferenceTable.Rule = NameToken; StructuredReferenceExpression.Rule = StructuredReferenceElement | at + StructuredReferenceElement | StructuredReferenceElement + colon + StructuredReferenceElement | at + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement ; StructuredReference.Rule = StructuredReferenceElement | OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceTable + StructuredReferenceElement | StructuredReferenceTable + OpenSquareParen + CloseSquareParen | StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); //RegisterOperators(Precedence.ParameterSeparator, comma); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int SRColumn = -900; public const int Name = -800; public const int ReservedName = -700; public const int FileName = -500; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v142; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceElement = "StructuredReferenceElement"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceTable = "StructuredReferenceTable"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRTableName = "SRTableName"; public const string TokenSRKeyword = "SRKeyword"; public const string TokenSRColumn = "SRColumn"; public const string TokenSREnclosedColumn = "SREnclosedColumn"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v142/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v142 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.LocationString = node.Print(); list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0} {1} {2}"; if (input.IsIntersection()) { format = "{0} {2}"; }else if (input.IsBinaryReferenceOperation()) { format = "{0}{1}{2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; case GrammarNames.StructuredReference: var sb = new StringBuilder(); var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable); var contentsNode = hashtable ? 1 : 0; childrenList = children.ToList(); if (hashtable) { sb.Append(childrenList[0]); } if (hashtable && input.ChildNodes.Count == 1) { // Full table reference sb.Append("[]"); } else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement)) { sb.Append(childrenList[contentsNode]); } else { sb.Append($"[{childrenList[contentsNode]}]"); } return sb.ToString(); // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.StructuredReferenceExpression: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v142/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v142 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table } public class ParserReference { public const int MaxRangeHeight = 100; public const int MaxRangeWidth = 100; public ReferenceType ReferenceType { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; private set; } public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1 public string MaxLocation { get; set; } public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null, string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null) { ReferenceType = referenceType; LocationString = locationString; Worksheet = worksheet; LastWorksheet = lastWorksheet; FilePath = filePath; FileName = fileName; Name = name; MinLocation = minLocation; MaxLocation = maxLocation != null ? maxLocation : minLocation; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceTable)?.ChildNodes[0].Token.ValueString; break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = "A" + horizontalLimits[0]; MaxLocation = ConvertColumnToStr(MaxRangeWidth - 1) + horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0] + "1"; MaxLocation = verticalLimits[1] + MaxRangeHeight; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; } LocationString = node.Print(); } /// /// Converts the column number to an Excel column string representation. /// /// The zero-based column number. private string ConvertColumnToStr(int columnNumber) { var sb = new System.Text.StringBuilder(); while (columnNumber >= 0) { sb.Insert(0, (char)(65 + columnNumber % 26)); columnNumber = columnNumber / 26 - 1; } return sb.ToString(); } public override string ToString() { return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v142/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v142 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v150/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.IO; using System.Reflection; namespace XLParser.Web.XLParserVersions.v150 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.5.0", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt} }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF}; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(") { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])"; public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*"; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex) { Priority = TerminalPriority.CellToken }; // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*") { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NameInvalidWordsRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+") { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+") { Priority = TerminalPriority.ReservedName }; #region Structured References //public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[") //{Priority = 0}; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+") { Priority = TerminalPriority.SRColumn }; //public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\]"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex) { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\]"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex) { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\.-]+\\[\w.$]+)\\(([^<>:""/\|?*\\]| )+\\)*"; private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&%\$#_]*)?"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex); #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); //public NonTerminal ReferenceFunction{ get; } = new NonTerminal(GrammarNames.ReferenceFunction); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); //public NonTerminal StructuredReferenceKeyword { get; } = new NonTerminal(GrammarNames.StructuredReferenceKeyword); public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() : base(false) { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenSquareParen, CloseSquareParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); //exclamationMark.SetFlag(TermFlags.IsDelimiter); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; //MarkTransient(Formula); ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); //Arguments.Rule = Argument | Argument + comma + Arguments; EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; //MarkTransient(Argument); PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); //PostfixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPostFix) + percentop; // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash; ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceElement.Rule = OpenSquareParen + SRColumnToken + CloseSquareParen | OpenSquareParen + NameToken + CloseSquareParen | FileNameEnclosedInBracketsToken; //StructuredReferenceKeyword.Rule = EnclosedInBracketsToken; StructuredReferenceTable.Rule = NameToken; StructuredReferenceExpression.Rule = StructuredReferenceElement | at + StructuredReferenceElement | StructuredReferenceElement + colon + StructuredReferenceElement | at + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement ; StructuredReference.Rule = StructuredReferenceElement | OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceTable + StructuredReferenceElement | StructuredReferenceTable + OpenSquareParen + CloseSquareParen | StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, plusop, minop, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int SRColumn = -900; public const int Name = -800; public const int ReservedName = -700; public const int FileName = -500; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v150; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceElement = "StructuredReferenceElement"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceTable = "StructuredReferenceTable"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRTableName = "SRTableName"; public const string TokenSRKeyword = "SRKeyword"; public const string TokenSRColumn = "SRColumn"; public const string TokenSREnclosedColumn = "SREnclosedColumn"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v150/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v150 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.LocationString = node.Print(); list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; case GrammarNames.StructuredReference: var sb = new StringBuilder(); var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable); var contentsNode = hashtable ? 1 : 0; childrenList = children.ToList(); if (hashtable) { sb.Append(childrenList[0]); } if (hashtable && input.ChildNodes.Count == 1) { // Full table reference sb.Append("[]"); } else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement)) { sb.Append(childrenList[contentsNode]); } else { sb.Append($"[{childrenList[contentsNode]}]"); } return sb.ToString(); // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.StructuredReferenceExpression: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v150/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v150 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table } public class ParserReference { public const int MaxRangeHeight = 1048576; public const int MaxRangeWidth = 16384; public ReferenceType ReferenceType { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; private set; } public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1 public string MaxLocation { get; set; } public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null, string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null) { ReferenceType = referenceType; LocationString = locationString; Worksheet = worksheet; LastWorksheet = lastWorksheet; FilePath = filePath; FileName = fileName; Name = name; MinLocation = minLocation; MaxLocation = maxLocation != null ? maxLocation : minLocation; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceTable)?.ChildNodes[0].Token.ValueString; break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = (horizontalLimits[0].StartsWith("$") ? "$" : "") + "A" + horizontalLimits[0]; MaxLocation = (horizontalLimits[1].StartsWith("$") ? "$" : "") + ConvertColumnToStr(MaxRangeWidth - 1) + horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0] + (verticalLimits[0].StartsWith("$") ? "$" : "") + "1"; MaxLocation = verticalLimits[1] + (verticalLimits[1].StartsWith("$") ? "$" : "") + MaxRangeHeight; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; } LocationString = node.Print(); } /// /// Converts the column number to an Excel column string representation. /// /// The zero-based column number. private string ConvertColumnToStr(int columnNumber) { var sb = new System.Text.StringBuilder(); while (columnNumber >= 0) { sb.Insert(0, (char)(65 + columnNumber % 26)); columnNumber = columnNumber / 26 - 1; } return sb.ToString(); } public override string ToString() { return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v150/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v150 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v151/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.IO; using System.Reflection; namespace XLParser.Web.XLParserVersions.v151 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.5.1", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt} }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF}; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(") { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])"; public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*"; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex) { Priority = TerminalPriority.CellToken }; // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*") { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NameInvalidWordsRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+") { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+") { Priority = TerminalPriority.ReservedName }; #region Structured References //public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[") //{Priority = 0}; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+") { Priority = TerminalPriority.SRColumn }; //public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\]"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex) { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\]"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex) { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\.-]+\\[\w.$]+)\\(([^<>:""/\|?*\\]| )+\\)*"; private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&%\$#_]*)?"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex); #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() : base(false) { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenSquareParen, CloseSquareParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceElement.Rule = OpenSquareParen + SRColumnToken + CloseSquareParen | OpenSquareParen + NameToken + CloseSquareParen | FileNameEnclosedInBracketsToken; StructuredReferenceTable.Rule = NameToken; StructuredReferenceExpression.Rule = StructuredReferenceElement | at + StructuredReferenceElement | StructuredReferenceElement + colon + StructuredReferenceElement | at + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement ; StructuredReference.Rule = StructuredReferenceElement | OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceTable + StructuredReferenceElement | StructuredReferenceTable + OpenSquareParen + CloseSquareParen | StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int SRColumn = -900; public const int Name = -800; public const int ReservedName = -700; public const int FileName = -500; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v151; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceElement = "StructuredReferenceElement"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceTable = "StructuredReferenceTable"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRTableName = "SRTableName"; public const string TokenSRKeyword = "SRKeyword"; public const string TokenSRColumn = "SRColumn"; public const string TokenSREnclosedColumn = "SREnclosedColumn"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v151/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v151 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.LocationString = node.Print(); list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; case GrammarNames.StructuredReference: var sb = new StringBuilder(); var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable); var contentsNode = hashtable ? 1 : 0; childrenList = children.ToList(); if (hashtable) { sb.Append(childrenList[0]); } if (hashtable && input.ChildNodes.Count == 1) { // Full table reference sb.Append("[]"); } else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement)) { sb.Append(childrenList[contentsNode]); } else { sb.Append($"[{childrenList[contentsNode]}]"); } return sb.ToString(); // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReferenceExpression: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v151/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v151 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table } public class ParserReference { public const int MaxRangeHeight = 1048576; public const int MaxRangeWidth = 16384; public ReferenceType ReferenceType { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; private set; } public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1 public string MaxLocation { get; set; } public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null, string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null) { ReferenceType = referenceType; LocationString = locationString; Worksheet = worksheet; LastWorksheet = lastWorksheet; FilePath = filePath; FileName = fileName; Name = name; MinLocation = minLocation; MaxLocation = maxLocation != null ? maxLocation : minLocation; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceTable)?.ChildNodes[0].Token.ValueString; break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; } LocationString = node.Print(); } /// /// Converts the column number to an Excel column string representation. /// /// The zero-based column number. private string ConvertColumnToStr(int columnNumber) { var sb = new System.Text.StringBuilder(); while (columnNumber >= 0) { sb.Insert(0, (char)(65 + columnNumber % 26)); columnNumber = columnNumber / 26 - 1; } return sb.ToString(); } public override string ToString() { return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v151/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v151 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v152/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.IO; using System.Reflection; namespace XLParser.Web.XLParserVersions.v152 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.5.2", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt} }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF}; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(") { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-Z]{1,2}|[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D])"; public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*"; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex) { Priority = TerminalPriority.CellToken }; // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*") { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NameInvalidWordsRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+") { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+") { Priority = TerminalPriority.ReservedName }; #region Structured References //public Terminal SRTableNameToken = new RegexBasedTerminal(GrammarNames.TokenSRTableName, @"[\w\\.]+\[") //{Priority = 0}; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, @"[\w\\.]+") { Priority = TerminalPriority.SRColumn }; //public Terminal SREnclosedColumnToken = new RegexBasedTerminal(GrammarNames.TokenSREnclosedColumn, @"\[( )*[\w+\\.,:#'""{}$^&*+=->&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\]"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex) { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\]"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex) { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ ]+)\\(([^<>:\""/\|?*\\]| )+\\)*"; private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceElement { get; } = new NonTerminal(GrammarNames.StructuredReferenceElement); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceTable { get; } = new NonTerminal(GrammarNames.StructuredReferenceTable); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() : base(false) { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenSquareParen, CloseSquareParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceElement.Rule = OpenSquareParen + SRColumnToken + CloseSquareParen | OpenSquareParen + NameToken + CloseSquareParen | FileNameEnclosedInBracketsToken; StructuredReferenceTable.Rule = NameToken; StructuredReferenceExpression.Rule = StructuredReferenceElement | at + StructuredReferenceElement | StructuredReferenceElement + colon + StructuredReferenceElement | at + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement | StructuredReferenceElement + comma + StructuredReferenceElement + comma + StructuredReferenceElement + colon + StructuredReferenceElement ; StructuredReference.Rule = StructuredReferenceElement | OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceTable + StructuredReferenceElement | StructuredReferenceTable + OpenSquareParen + CloseSquareParen | StructuredReferenceTable + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int SRColumn = -900; public const int Name = -800; public const int ReservedName = -700; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v152; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceElement = "StructuredReferenceElement"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceTable = "StructuredReferenceTable"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRTableName = "SRTableName"; public const string TokenSRKeyword = "SRKeyword"; public const string TokenSRColumn = "SRColumn"; public const string TokenSREnclosedColumn = "SREnclosedColumn"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v152/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v152 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.LocationString = node.Print(); list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; case GrammarNames.StructuredReference: var sb = new StringBuilder(); var hashtable = input.ChildNodes.Count >= 1 && input.ChildNodes[0].Is(GrammarNames.StructuredReferenceTable); var contentsNode = hashtable ? 1 : 0; childrenList = children.ToList(); if (hashtable) { sb.Append(childrenList[0]); } if (hashtable && input.ChildNodes.Count == 1) { // Full table reference sb.Append("[]"); } else if (input.ChildNodes[contentsNode].Is(GrammarNames.StructuredReferenceElement)) { sb.Append(childrenList[contentsNode]); } else { sb.Append($"[{childrenList[contentsNode]}]"); } return sb.ToString(); // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReferenceExpression: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v152/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v152 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table } public class ParserReference { public const int MaxRangeHeight = 1048576; public const int MaxRangeWidth = 16384; public ReferenceType ReferenceType { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; private set; } public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1 public string MaxLocation { get; set; } public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null, string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null) { ReferenceType = referenceType; LocationString = locationString; Worksheet = worksheet; LastWorksheet = lastWorksheet; FilePath = filePath; FileName = fileName; Name = name; MinLocation = minLocation; MaxLocation = maxLocation != null ? maxLocation : minLocation; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceTable)?.ChildNodes[0].Token.ValueString; break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; } LocationString = node.Print(); } /// /// Converts the column number to an Excel column string representation. /// /// The zero-based column number. private string ConvertColumnToStr(int columnNumber) { var sb = new System.Text.StringBuilder(); while (columnNumber >= 0) { sb.Insert(0, (char)(65 + columnNumber % 26)); columnNumber = columnNumber / 26 - 1; } return sb.ToString(); } public override string ToString() { return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v152/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v152 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v160/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.IO; using System.Reflection; namespace XLParser.Web.XLParserVersions.v160 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.6.0", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt} }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF}; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(") { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})"; public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*"; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex) { Priority = TerminalPriority.CellToken }; // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*") { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NameInvalidWordsRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+") { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+") { Priority = TerminalPriority.ReservedName }; #region Structured References private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)"; public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex) { Priority = TerminalPriority.StructuredReference }; private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+"; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex) { Priority = TerminalPriority.StructuredReference }; #endregion #region Prefixes private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\](?=[^\[\]]*!)"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex) { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?=[^\[\]]*!)"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex) { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*"; private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier); public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() : base(false) { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceQualifier.Rule = NameToken; StructuredReferenceSpecifier.Rule = SRSpecifierToken | OpenSquareParen + SRSpecifierToken + CloseSquareParen; StructuredReferenceColumn.Rule = SRColumnToken | OpenSquareParen + SRColumnToken + CloseSquareParen; StructuredReferenceExpression.Rule = StructuredReferenceColumn | StructuredReferenceColumn + colon + StructuredReferenceColumn | at + StructuredReferenceColumn | at + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn ; StructuredReference.Rule = OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int Name = -800; public const int ReservedName = -700; public const int StructuredReference = -500; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v160; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceColumn = "StructuredReferenceColumn"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier"; public const string StructuredReferenceQualifier = "StructuredReferenceQualifier"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRColumn = "SRColumnToken"; public const string TokenSRSpecifier = "SRSpecifierToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v160/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v160 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.LocationString = node.Print(); list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReference: case GrammarNames.StructuredReferenceColumn: case GrammarNames.StructuredReferenceExpression: case GrammarNames.StructuredReferenceSpecifier: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v160/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v160 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table } public class ParserReference { public const int MaxRangeHeight = 1048576; public const int MaxRangeWidth = 16384; public ReferenceType ReferenceType { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; private set; } public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1 public string MaxLocation { get; set; } public string[] TableSpecifiers { get; set; } public string[] TableColumns { get; set; } public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null, string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null, string[] tableSpecifiers = null, string[] tableColumns = null) { ReferenceType = referenceType; LocationString = locationString; Worksheet = worksheet; LastWorksheet = lastWorksheet; FilePath = filePath; FileName = fileName; Name = name; MinLocation = minLocation; MaxLocation = maxLocation != null ? maxLocation : minLocation; TableColumns = tableColumns; TableSpecifiers = tableSpecifiers; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString; TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; } LocationString = node.Print(); } private string UnEscape(string value, string escapeCharacter) { return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", ""); } /// /// Converts the column number to an Excel column string representation. /// /// The zero-based column number. private string ConvertColumnToStr(int columnNumber) { var sb = new System.Text.StringBuilder(); while (columnNumber >= 0) { sb.Insert(0, (char)(65 + columnNumber % 26)); columnNumber = columnNumber / 26 - 1; } return sb.ToString(); } public override string ToString() { return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v160/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v160 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v161/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.IO; using System.Reflection; namespace XLParser.Web.XLParserVersions.v161 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.6.1", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] {TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt} }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) {Priority = TerminalPriority.UDF}; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new RegexBasedTerminal(GrammarNames.ExcelFunction, "(" + string.Join("|", excelFunctionList) + ")\\(") { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})"; public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*"); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*"; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex) { Priority = TerminalPriority.CellToken }; // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*") { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NameInvalidWordsRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex + NameValidCharacterRegex + "+") { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+") { Priority = TerminalPriority.ReservedName }; #region Structured References private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)"; public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex) { Priority = TerminalPriority.StructuredReference }; private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+"; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex) { Priority = TerminalPriority.StructuredReference }; #endregion #region Prefixes private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\](?=[^\[\]]*!)"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex) { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?=[^\[\]]*!)"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex) { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*"; private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier); public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() : base(false) { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceQualifier.Rule = NameToken; StructuredReferenceSpecifier.Rule = SRSpecifierToken | OpenSquareParen + SRSpecifierToken + CloseSquareParen; StructuredReferenceColumn.Rule = SRColumnToken | OpenSquareParen + SRColumnToken + CloseSquareParen; StructuredReferenceExpression.Rule = StructuredReferenceColumn | StructuredReferenceColumn + colon + StructuredReferenceColumn | at + StructuredReferenceColumn | at + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn ; StructuredReference.Rule = OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int Name = -800; public const int ReservedName = -700; public const int StructuredReference = -500; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v161; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceColumn = "StructuredReferenceColumn"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier"; public const string StructuredReferenceQualifier = "StructuredReferenceQualifier"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRColumn = "SRColumnToken"; public const string TokenSRSpecifier = "SRSpecifierToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v161/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v161 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.LocationString = node.Print(); list.Add(range); } else if (IsTableReference(rangeStart) && IsTableReference(rangeEnd) && rangeStart.First().Name == rangeEnd.First().Name && rangeStart.First().TableColumns.Length == 1 && rangeEnd.First().TableColumns.Length == 1) { ParserReference range = rangeStart.First(); range.TableColumns = rangeStart.First().TableColumns.Concat(rangeEnd.First().TableColumns).ToArray(); range.TableSpecifiers = rangeStart.First().TableSpecifiers.SequenceEqual(rangeEnd.First().TableSpecifiers) ? range.TableSpecifiers : new string[0]; list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } private static bool IsTableReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Table; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReference: case GrammarNames.StructuredReferenceColumn: case GrammarNames.StructuredReferenceExpression: case GrammarNames.StructuredReferenceSpecifier: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v161/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v161 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table } public class ParserReference { public const int MaxRangeHeight = 1048576; public const int MaxRangeWidth = 16384; public ReferenceType ReferenceType { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; private set; } public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1 public string MaxLocation { get; set; } public string[] TableSpecifiers { get; set; } public string[] TableColumns { get; set; } public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null, string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null, string[] tableSpecifiers = null, string[] tableColumns = null) { ReferenceType = referenceType; LocationString = locationString; Worksheet = worksheet; LastWorksheet = lastWorksheet; FilePath = filePath; FileName = fileName; Name = name; MinLocation = minLocation; MaxLocation = maxLocation != null ? maxLocation : minLocation; TableColumns = tableColumns; TableSpecifiers = tableSpecifiers; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString; TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; } LocationString = node.Print(); } private string UnEscape(string value, string escapeCharacter) { return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", ""); } /// /// Converts the column number to an Excel column string representation. /// /// The zero-based column number. private string ConvertColumnToStr(int columnNumber) { var sb = new System.Text.StringBuilder(); while (columnNumber >= 0) { sb.Insert(0, (char)(65 + columnNumber % 26)); columnNumber = columnNumber / 26 - 1; } return sb.ToString(); } public override string ToString() { return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v161/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v161 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v162/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; namespace XLParser.Web.XLParserVersions.v162 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.6.2", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt } }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF }; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '(')) { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})"; private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*", RowPrefix); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*"; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix) { Priority = TerminalPriority.CellToken }; private static readonly HashSet UnicodeLetterCategories = new HashSet { UnicodeCategory.UppercaseLetter, UnicodeCategory.LowercaseLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter }; // 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators... private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray(); private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray(); // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix) { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NameInvalidWordsRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex, ColumnPrefix.Concat(new[] { "T", "F" }).ToArray()) { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_") { Priority = TerminalPriority.ReservedName }; #region Structured References private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)"; public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#") { Priority = TerminalPriority.StructuredReference }; private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+"; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex) { Priority = TerminalPriority.StructuredReference }; #endregion #region Prefixes private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\](?=[^\[\]]*!)"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[") { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?=[^\[\]]*!)"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[") { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*"; private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier); public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceQualifier.Rule = NameToken; StructuredReferenceSpecifier.Rule = SRSpecifierToken | OpenSquareParen + SRSpecifierToken + CloseSquareParen; StructuredReferenceColumn.Rule = SRColumnToken | OpenSquareParen + SRColumnToken + CloseSquareParen; StructuredReferenceExpression.Rule = StructuredReferenceColumn | StructuredReferenceColumn + colon + StructuredReferenceColumn | at + StructuredReferenceColumn | at + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn ; StructuredReference.Rule = OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int Name = -800; public const int ReservedName = -700; public const int StructuredReference = -500; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v162; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceColumn = "StructuredReferenceColumn"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier"; public const string StructuredReferenceQualifier = "StructuredReferenceQualifier"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRColumn = "SRColumnToken"; public const string TokenSRSpecifier = "SRSpecifierToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v162/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v162 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.LocationString = node.Print(); list.Add(range); } else if (IsTableReference(rangeStart) && IsTableReference(rangeEnd) && rangeStart.First().Name == rangeEnd.First().Name && rangeStart.First().TableColumns.Length == 1 && rangeEnd.First().TableColumns.Length == 1) { ParserReference range = rangeStart.First(); range.TableColumns = rangeStart.First().TableColumns.Concat(rangeEnd.First().TableColumns).ToArray(); range.TableSpecifiers = rangeStart.First().TableSpecifiers.SequenceEqual(rangeEnd.First().TableSpecifiers) ? range.TableSpecifiers : new string[0]; list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } private static bool IsTableReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Table; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReference: case GrammarNames.StructuredReferenceColumn: case GrammarNames.StructuredReferenceExpression: case GrammarNames.StructuredReferenceSpecifier: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v162/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v162 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table } public class ParserReference { public const int MaxRangeHeight = 1048576; public const int MaxRangeWidth = 16384; public ReferenceType ReferenceType { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; private set; } public string MinLocation { get; set; } //Location as appearing in the formula, eg $A$1 public string MaxLocation { get; set; } public string[] TableSpecifiers { get; set; } public string[] TableColumns { get; set; } public ParserReference(ReferenceType referenceType, string locationString = null, string worksheet = null, string lastWorksheet = null, string filePath = null, string fileName = null, string name = null, string minLocation = null, string maxLocation = null, string[] tableSpecifiers = null, string[] tableColumns = null) { ReferenceType = referenceType; LocationString = locationString; Worksheet = worksheet; LastWorksheet = lastWorksheet; FilePath = filePath; FileName = fileName; Name = name; MinLocation = minLocation; MaxLocation = maxLocation != null ? maxLocation : minLocation; TableColumns = tableColumns; TableSpecifiers = tableSpecifiers; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString; TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; } LocationString = node.Print(); } private string UnEscape(string value, string escapeCharacter) { return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", ""); } /// /// Converts the column number to an Excel column string representation. /// /// The zero-based column number. private string ConvertColumnToStr(int columnNumber) { var sb = new System.Text.StringBuilder(); while (columnNumber >= 0) { sb.Insert(0, (char)(65 + columnNumber % 26)); columnNumber = columnNumber / 26 - 1; } return sb.ToString(); } public override string ToString() { return ReferenceType == ReferenceType.Cell ? MinLocation.ToString() : string.Format("{0}:{1}", MinLocation, MaxLocation); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v162/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v162 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v162/WordsTerminal.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; namespace XLParser.Web.XLParserVersions.v162 { /// /// Terminal that can determine, if there the input contains a one of expected words. /// /// Children of each node are represented as an array to allow direct indexation. Do not use /// for words that have a large difference between low and high character of a token. public class WordsTerminal : Terminal { private readonly Node _rootNode; private readonly List _words; private bool _caseSensitive; public WordsTerminal(string name, IEnumerable words) : base(name) { _rootNode = new Node(0); _words = new List(words); } public override void Init(GrammarData grammarData) { base.Init(grammarData); _caseSensitive = Grammar.CaseSensitive; foreach (var word in _words) { AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant()); } if (EditorInfo == null) { EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); } } public override IList GetFirsts() => _words; public override Token TryMatch(ParsingContext context, ISourceStream source) { var node = _rootNode; var input = source.Text; for (var i = source.PreviewPosition; i < input.Length; ++i) { var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]); var nextNode = node[c]; if (nextNode is null) { break; } node = nextNode; } if (!node.IsTerminal) { return null; } source.PreviewPosition += node.Length; return source.CreateToken(OutputTerminal); } private void AddWordToTree(string word) { var node = _rootNode; foreach (var c in word) { node = node.GetOrAddChild(c); } node.IsTerminal = true; } private class Node { private char _lowChar = '\0'; private char _highChar = '\0'; private Node[] _children; public Node(int length) { Length = length; } public bool IsTerminal { get; set; } public int Length { get; } public Node this[char c] { get { if (_children is null) { return null; } if (c < _lowChar || c > _highChar) { return null; } return _children[c - _lowChar]; } } internal Node GetOrAddChild(char c) { if (_children is null) { var node = new Node(Length + 1); _children = new[] { node }; _lowChar = c; _highChar = c; return node; } var newLowChar = (char)Math.Min(_lowChar, c); if (newLowChar != _lowChar) { var newChildrenCount = _highChar - newLowChar + 1; Array.Resize(ref _children, newChildrenCount); var ofs = _lowChar - newLowChar; Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs); Array.Clear(_children, 0, ofs); _lowChar = newLowChar; return _children[0] = new Node(Length + 1); } var newHighChar = (char)Math.Max(_highChar, c); if (newHighChar != _highChar) { var newChildrenCount = newHighChar - _lowChar + 1; Array.Resize(ref _children, newChildrenCount); _highChar = newHighChar; return _children[newChildrenCount - 1] = new Node(Length + 1); } var charIdx = c - _lowChar; var child = _children[charIdx]; if (child is null) { return _children[charIdx] = new Node(Length + 1); } return child; } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v163/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; namespace XLParser.Web.XLParserVersions.v163 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.6.3", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt } }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF }; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '(')) { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})"; private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])"; private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix) { Priority = TerminalPriority.CellToken }; private static readonly HashSet UnicodeLetterCategories = new HashSet { UnicodeCategory.UppercaseLetter, UnicodeCategory.LowercaseLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter }; // 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators... private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray(); private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray(); // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix) { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NamedRangeCombinationRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" // allow large cell references (e.g. A1048577) as named range + "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex, ColumnPrefix.Concat(new[] { "T", "F" }).ToArray()) { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_") { Priority = TerminalPriority.ReservedName }; #region Structured References private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)"; public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#") { Priority = TerminalPriority.StructuredReference }; private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+"; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex) { Priority = TerminalPriority.StructuredReference }; #endregion #region Prefixes private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\](?=[^\[\]]*!)"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[") { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?=[^\[\]]*!)"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[") { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*"; private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier); public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceQualifier.Rule = NameToken; StructuredReferenceSpecifier.Rule = SRSpecifierToken | OpenSquareParen + SRSpecifierToken + CloseSquareParen; StructuredReferenceColumn.Rule = SRColumnToken | OpenSquareParen + SRColumnToken + CloseSquareParen; StructuredReferenceExpression.Rule = StructuredReferenceColumn | StructuredReferenceColumn + colon + StructuredReferenceColumn | at + StructuredReferenceColumn | at + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn ; StructuredReference.Rule = OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int Name = -800; public const int ReservedName = -700; public const int StructuredReference = -500; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v163; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceColumn = "StructuredReferenceColumn"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier"; public const string StructuredReferenceQualifier = "StructuredReferenceQualifier"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRColumn = "SRColumnToken"; public const string TokenSRSpecifier = "SRSpecifierToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v163/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v163 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); } else if (IsTableReference(rangeStart) && IsTableReference(rangeEnd) && rangeStart.First().Name == rangeEnd.First().Name && rangeStart.First().TableColumns.Length == 1 && rangeEnd.First().TableColumns.Length == 1) { ParserReference range = rangeStart.First(); range.TableColumns = rangeStart.First().TableColumns.Concat(rangeEnd.First().TableColumns).ToArray(); range.TableSpecifiers = rangeStart.First().TableSpecifiers.SequenceEqual(rangeEnd.First().TableSpecifiers) ? range.TableSpecifiers : new string[0]; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } private static bool IsTableReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Table; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReference: case GrammarNames.StructuredReferenceColumn: case GrammarNames.StructuredReferenceExpression: case GrammarNames.StructuredReferenceSpecifier: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v163/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v163 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table } public class ParserReference { public ReferenceType ReferenceType { get; set; } public ParseTreeNode ReferenceNode { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; set; } public string MinLocation { get; set; } public string MaxLocation { get; set; } public string[] TableSpecifiers { get; set; } public string[] TableColumns { get; set; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString; TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; } ReferenceNode = node; LocationString = node.Print(); } private string UnEscape(string value, string escapeCharacter) { return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", ""); } public override string ToString() { return LocationString; } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v163/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v163 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v163/WordsTerminal.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; namespace XLParser.Web.XLParserVersions.v163 { /// /// Terminal that can determine, if there the input contains a one of expected words. /// /// Children of each node are represented as an array to allow direct indexation. Do not use /// for words that have a large difference between low and high character of a token. public class WordsTerminal : Terminal { private readonly Node _rootNode; private readonly List _words; private bool _caseSensitive; public WordsTerminal(string name, IEnumerable words) : base(name) { _rootNode = new Node(0); _words = new List(words); } public override void Init(GrammarData grammarData) { base.Init(grammarData); _caseSensitive = Grammar.CaseSensitive; foreach (var word in _words) { AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant()); } if (EditorInfo == null) { EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); } } public override IList GetFirsts() => _words; public override Token TryMatch(ParsingContext context, ISourceStream source) { var node = _rootNode; var input = source.Text; for (var i = source.PreviewPosition; i < input.Length; ++i) { var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]); var nextNode = node[c]; if (nextNode is null) { break; } node = nextNode; } if (!node.IsTerminal) { return null; } source.PreviewPosition += node.Length; return source.CreateToken(OutputTerminal); } private void AddWordToTree(string word) { var node = _rootNode; foreach (var c in word) { node = node.GetOrAddChild(c); } node.IsTerminal = true; } private class Node { private char _lowChar = '\0'; private char _highChar = '\0'; private Node[] _children; public Node(int length) { Length = length; } public bool IsTerminal { get; set; } public int Length { get; } public Node this[char c] { get { if (_children is null) { return null; } if (c < _lowChar || c > _highChar) { return null; } return _children[c - _lowChar]; } } internal Node GetOrAddChild(char c) { if (_children is null) { var node = new Node(Length + 1); _children = new[] { node }; _lowChar = c; _highChar = c; return node; } var newLowChar = (char)Math.Min(_lowChar, c); if (newLowChar != _lowChar) { var newChildrenCount = _highChar - newLowChar + 1; Array.Resize(ref _children, newChildrenCount); var ofs = _lowChar - newLowChar; Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs); Array.Clear(_children, 0, ofs); _lowChar = newLowChar; return _children[0] = new Node(Length + 1); } var newHighChar = (char)Math.Max(_highChar, c); if (newHighChar != _highChar) { var newChildrenCount = newHighChar - _lowChar + 1; Array.Resize(ref _children, newChildrenCount); _highChar = newHighChar; return _children[newChildrenCount - 1] = new Node(Length + 1); } var charIdx = c - _lowChar; var child = _children[charIdx]; if (child is null) { return _children[charIdx] = new Node(Length + 1); } return child; } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v170/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; namespace XLParser.Web.XLParserVersions.v170 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.7.0", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt } }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF }; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '(')) { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})"; private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])"; private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix) { Priority = TerminalPriority.CellToken }; private static readonly HashSet UnicodeLetterCategories = new HashSet { UnicodeCategory.UppercaseLetter, UnicodeCategory.LowercaseLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter }; // 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators... private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray(); private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray(); // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix) { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NamedRangeCombinationRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" // allow large cell references (e.g. A1048577) as named range + "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex, ColumnPrefix.Concat(new[] { "T", "F" }).ToArray()) { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_") { Priority = TerminalPriority.ReservedName }; #region Structured References private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)"; public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#") { Priority = TerminalPriority.StructuredReference }; private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+"; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex) { Priority = TerminalPriority.StructuredReference }; #endregion #region Prefixes private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\](?!,)(?=.*!)"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[") { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?!,)(?=.*!)"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[") { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*"; private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier); public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceQualifier.Rule = NameToken; StructuredReferenceSpecifier.Rule = SRSpecifierToken | at | OpenSquareParen + SRSpecifierToken + CloseSquareParen; StructuredReferenceColumn.Rule = SRColumnToken | OpenSquareParen + SRColumnToken + CloseSquareParen; StructuredReferenceExpression.Rule = StructuredReferenceColumn | StructuredReferenceColumn + colon + StructuredReferenceColumn | at + StructuredReferenceColumn | at + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn ; StructuredReference.Rule = OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int Name = -800; public const int ReservedName = -700; public const int StructuredReference = -500; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v170; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceColumn = "StructuredReferenceColumn"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier"; public const string StructuredReferenceQualifier = "StructuredReferenceQualifier"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRColumn = "SRColumnToken"; public const string TokenSRSpecifier = "SRSpecifierToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v170/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v170 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).ToArray(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).ToArray(); if (IsCellReference(rangeStart) && IsCellReference(rangeEnd)) { ParserReference range = rangeStart.First(); range.MaxLocation = rangeEnd.First().MinLocation; range.ReferenceType = ReferenceType.CellRange; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); } else if (IsTableReference(rangeStart) && IsTableReference(rangeEnd) && rangeStart.First().Name == rangeEnd.First().Name && rangeStart.First().TableColumns.Length == 1 && rangeEnd.First().TableColumns.Length == 1) { ParserReference range = rangeStart.First(); range.TableColumns = rangeStart.First().TableColumns.Concat(rangeEnd.First().TableColumns).ToArray(); range.TableSpecifiers = rangeStart.First().TableSpecifiers.SequenceEqual(rangeEnd.First().TableSpecifiers) ? range.TableSpecifiers : new string[0]; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); } else { list.AddRange(rangeStart); list.AddRange(rangeEnd); } } else { list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); } break; } return list; } private static bool IsCellReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Cell; } private static bool IsTableReference(IList references) { return references.Count == 1 && references.First().ReferenceType == ReferenceType.Table; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":"); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReference: case GrammarNames.StructuredReferenceColumn: case GrammarNames.StructuredReferenceExpression: case GrammarNames.StructuredReferenceSpecifier: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v170/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v170 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table } public class ParserReference { public ReferenceType ReferenceType { get; set; } public ParseTreeNode ReferenceNode { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; set; } public string MinLocation { get; set; } public string MaxLocation { get; set; } public string[] TableSpecifiers { get; set; } public string[] TableColumns { get; set; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString; TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; } ReferenceNode = node; LocationString = node.Print(); } private string UnEscape(string value, string escapeCharacter) { return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", ""); } public override string ToString() { return LocationString; } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v170/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v170 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v170/WordsTerminal.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; namespace XLParser.Web.XLParserVersions.v170 { /// /// Terminal that can determine, if there the input contains a one of expected words. /// /// Children of each node are represented as an array to allow direct indexation. Do not use /// for words that have a large difference between low and high character of a token. public class WordsTerminal : Terminal { private readonly Node _rootNode; private readonly List _words; private bool _caseSensitive; public WordsTerminal(string name, IEnumerable words) : base(name) { _rootNode = new Node(0); _words = new List(words); } public override void Init(GrammarData grammarData) { base.Init(grammarData); _caseSensitive = Grammar.CaseSensitive; foreach (var word in _words) { AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant()); } if (EditorInfo == null) { EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); } } public override IList GetFirsts() => _words; public override Token TryMatch(ParsingContext context, ISourceStream source) { var node = _rootNode; var input = source.Text; for (var i = source.PreviewPosition; i < input.Length; ++i) { var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]); var nextNode = node[c]; if (nextNode is null) { break; } node = nextNode; } if (!node.IsTerminal) { return null; } source.PreviewPosition += node.Length; return source.CreateToken(OutputTerminal); } private void AddWordToTree(string word) { var node = _rootNode; foreach (var c in word) { node = node.GetOrAddChild(c); } node.IsTerminal = true; } private class Node { private char _lowChar = '\0'; private char _highChar = '\0'; private Node[] _children; public Node(int length) { Length = length; } public bool IsTerminal { get; set; } public int Length { get; } public Node this[char c] { get { if (_children is null) { return null; } if (c < _lowChar || c > _highChar) { return null; } return _children[c - _lowChar]; } } internal Node GetOrAddChild(char c) { if (_children is null) { var node = new Node(Length + 1); _children = new[] { node }; _lowChar = c; _highChar = c; return node; } var newLowChar = (char)Math.Min(_lowChar, c); if (newLowChar != _lowChar) { var newChildrenCount = _highChar - newLowChar + 1; Array.Resize(ref _children, newChildrenCount); var ofs = _lowChar - newLowChar; Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs); Array.Clear(_children, 0, ofs); _lowChar = newLowChar; return _children[0] = new Node(Length + 1); } var newHighChar = (char)Math.Max(_highChar, c); if (newHighChar != _highChar) { var newChildrenCount = newHighChar - _lowChar + 1; Array.Resize(ref _children, newChildrenCount); _highChar = newHighChar; return _children[newChildrenCount - 1] = new Node(Length + 1); } var charIdx = c - _lowChar; var child = _children[charIdx]; if (child is null) { return _children[charIdx] = new Node(Length + 1); } return child; } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v171/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; namespace XLParser.Web.XLParserVersions.v171 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.7.1", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt } }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF }; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '(')) { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})"; private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])"; private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix) { Priority = TerminalPriority.CellToken }; private static readonly HashSet UnicodeLetterCategories = new HashSet { UnicodeCategory.UppercaseLetter, UnicodeCategory.LowercaseLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter }; // 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators... private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray(); private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray(); // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix) { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NamedRangeCombinationRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" // allow large cell references (e.g. A1048577) as named range + "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex, ColumnPrefix.Concat(new[] { "T", "F" }).ToArray()) { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_") { Priority = TerminalPriority.ReservedName }; #region Structured References private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)"; public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#") { Priority = TerminalPriority.StructuredReference }; private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+"; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex) { Priority = TerminalPriority.StructuredReference }; #endregion #region Prefixes private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\](?!,)(?=.*!)"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[") { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?!,)(?=.*!)"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[") { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*"; private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier); public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceQualifier.Rule = NameToken; StructuredReferenceSpecifier.Rule = SRSpecifierToken | at | OpenSquareParen + SRSpecifierToken + CloseSquareParen; StructuredReferenceColumn.Rule = SRColumnToken | OpenSquareParen + SRColumnToken + CloseSquareParen; StructuredReferenceExpression.Rule = StructuredReferenceColumn | StructuredReferenceColumn + colon + StructuredReferenceColumn | at + StructuredReferenceColumn | at + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn ; StructuredReference.Rule = OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int Name = -800; public const int ReservedName = -700; public const int StructuredReference = -500; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v171; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceColumn = "StructuredReferenceColumn"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier"; public const string StructuredReferenceQualifier = "StructuredReferenceQualifier"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRColumn = "SRColumnToken"; public const string TokenSRSpecifier = "SRSpecifierToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v171/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v171 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0].SkipToRelevant()).First(); var rangeEnd = GetParserReferences(node.ChildNodes[2].SkipToRelevant()).First(); if (rangeStart.ReferenceType == ReferenceType.Cell && rangeEnd.ReferenceType == ReferenceType.Cell) { ParserReference range = rangeStart; range.MaxLocation = rangeEnd.MinLocation; range.ReferenceType = ReferenceType.CellRange; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); break; } if (rangeStart.ReferenceType == ReferenceType.Table && rangeEnd.ReferenceType == ReferenceType.Table && rangeStart.Name == rangeEnd.Name && rangeStart.TableColumns.Length == 1 && rangeEnd.TableColumns.Length == 1) { ParserReference range = rangeStart; range.TableColumns = rangeStart.TableColumns.Concat(rangeEnd.TableColumns).ToArray(); range.TableSpecifiers = rangeStart.TableSpecifiers.SequenceEqual(rangeEnd.TableSpecifiers) ? range.TableSpecifiers : new string[0]; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); break; } } list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; } return list; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":") && input.ChildNodes[0].ChildNodes.Last().Type() == input.ChildNodes[2].ChildNodes.Last().Type(); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReference: case GrammarNames.StructuredReferenceColumn: case GrammarNames.StructuredReferenceExpression: case GrammarNames.StructuredReferenceSpecifier: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v171/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v171 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table, UserDefinedFunction } public class ParserReference { public ReferenceType ReferenceType { get; set; } public ParseTreeNode ReferenceNode { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; set; } public string MinLocation { get; set; } public string MaxLocation { get; set; } public string[] TableSpecifiers { get; set; } public string[] TableColumns { get; set; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString; TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; case GrammarNames.UDFunctionCall: ReferenceType = ReferenceType.UserDefinedFunction; Name = node.ChildNodes[0].ChildNodes[0].Token.ValueString.TrimEnd('('); break; } ReferenceNode = node; LocationString = node.Print(); if (ReferenceType == ReferenceType.UserDefinedFunction && Name != null) { LocationString = LocationString.Substring(0, LocationString.IndexOf('(', LocationString.LastIndexOf(Name, System.StringComparison.Ordinal))); } } private string UnEscape(string value, string escapeCharacter) { return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", ""); } public override string ToString() { return LocationString; } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v171/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v171 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v171/WordsTerminal.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; namespace XLParser.Web.XLParserVersions.v171 { /// /// Terminal that can determine, if there the input contains a one of expected words. /// /// Children of each node are represented as an array to allow direct indexation. Do not use /// for words that have a large difference between low and high character of a token. public class WordsTerminal : Terminal { private readonly Node _rootNode; private readonly List _words; private bool _caseSensitive; public WordsTerminal(string name, IEnumerable words) : base(name) { _rootNode = new Node(0); _words = new List(words); } public override void Init(GrammarData grammarData) { base.Init(grammarData); _caseSensitive = Grammar.CaseSensitive; foreach (var word in _words) { AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant()); } if (EditorInfo == null) { EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); } } public override IList GetFirsts() => _words; public override Token TryMatch(ParsingContext context, ISourceStream source) { var node = _rootNode; var input = source.Text; for (var i = source.PreviewPosition; i < input.Length; ++i) { var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]); var nextNode = node[c]; if (nextNode is null) { break; } node = nextNode; } if (!node.IsTerminal) { return null; } source.PreviewPosition += node.Length; return source.CreateToken(OutputTerminal); } private void AddWordToTree(string word) { var node = _rootNode; foreach (var c in word) { node = node.GetOrAddChild(c); } node.IsTerminal = true; } private class Node { private char _lowChar = '\0'; private char _highChar = '\0'; private Node[] _children; public Node(int length) { Length = length; } public bool IsTerminal { get; set; } public int Length { get; } public Node this[char c] { get { if (_children is null) { return null; } if (c < _lowChar || c > _highChar) { return null; } return _children[c - _lowChar]; } } internal Node GetOrAddChild(char c) { if (_children is null) { var node = new Node(Length + 1); _children = new[] { node }; _lowChar = c; _highChar = c; return node; } var newLowChar = (char)Math.Min(_lowChar, c); if (newLowChar != _lowChar) { var newChildrenCount = _highChar - newLowChar + 1; Array.Resize(ref _children, newChildrenCount); var ofs = _lowChar - newLowChar; Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs); Array.Clear(_children, 0, ofs); _lowChar = newLowChar; return _children[0] = new Node(Length + 1); } var newHighChar = (char)Math.Max(_highChar, c); if (newHighChar != _highChar) { var newChildrenCount = newHighChar - _lowChar + 1; Array.Resize(ref _children, newChildrenCount); _highChar = newHighChar; return _children[newChildrenCount - 1] = new Node(Length + 1); } var charIdx = c - _lowChar; var child = _children[charIdx]; if (child is null) { return _children[charIdx] = new Node(Length + 1); } return child; } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v172/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; namespace XLParser.Web.XLParserVersions.v172 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.7.2", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.None) { DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt } }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF }; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '(')) { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})"; private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])"; private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix) { Priority = TerminalPriority.CellToken }; private static readonly HashSet UnicodeLetterCategories = new HashSet { UnicodeCategory.UppercaseLetter, UnicodeCategory.LowercaseLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter }; // 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators... private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray(); private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray(); // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix) { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NamedRangeCombinationRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" // allow large cell references (e.g. A1048577) as named range + "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex, ColumnPrefix.Concat(new[] { "T", "F" }).ToArray()) { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_") { Priority = TerminalPriority.ReservedName }; #region Structured References private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)"; public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#") { Priority = TerminalPriority.StructuredReference }; private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+"; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex) { Priority = TerminalPriority.StructuredReference }; #endregion #region Prefixes private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\](?!,)(?=.*!)"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[") { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?!,)(?=.*!)"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[") { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*"; private const string urlPathRegex = @"http(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*[/]([a-zA-Z0-9\-\.\?\,\'+&%\$#_ ()]*[/])*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier); public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceQualifier.Rule = NameToken; StructuredReferenceSpecifier.Rule = SRSpecifierToken | at | OpenSquareParen + SRSpecifierToken + CloseSquareParen; StructuredReferenceColumn.Rule = SRColumnToken | OpenSquareParen + SRColumnToken + CloseSquareParen; StructuredReferenceExpression.Rule = StructuredReferenceColumn | StructuredReferenceColumn + colon + StructuredReferenceColumn | at + StructuredReferenceColumn | at + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn ; StructuredReference.Rule = OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int Name = -800; public const int ReservedName = -700; public const int StructuredReference = -500; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v172; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceColumn = "StructuredReferenceColumn"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier"; public const string StructuredReferenceQualifier = "StructuredReferenceQualifier"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRColumn = "SRColumnToken"; public const string TokenSRSpecifier = "SRSpecifierToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v172/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v172 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: case GrammarNames.RefError: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0]).FirstOrDefault(); var rangeEnd = GetParserReferences(node.ChildNodes[2]).FirstOrDefault(); if (rangeStart?.ReferenceType == ReferenceType.Cell && rangeEnd?.ReferenceType == ReferenceType.Cell) { ParserReference range = rangeStart; range.MaxLocation = rangeEnd.MinLocation; range.ReferenceType = ReferenceType.CellRange; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); break; } if (rangeStart?.ReferenceType == ReferenceType.Table && rangeEnd?.ReferenceType == ReferenceType.Table && rangeStart.Name == rangeEnd.Name && rangeStart.TableColumns.Length == 1 && rangeEnd.TableColumns.Length == 1) { ParserReference range = rangeStart; range.TableColumns = rangeStart.TableColumns.Concat(rangeEnd.TableColumns).ToArray(); range.TableSpecifiers = rangeStart.TableSpecifiers.SequenceEqual(rangeEnd.TableSpecifiers) ? range.TableSpecifiers : new string[0]; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); break; } } list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; } return list; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":") && input.ChildNodes[0].ChildNodes.Last().Type() == input.ChildNodes[2].ChildNodes.Last().Type(); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReference: case GrammarNames.StructuredReferenceColumn: case GrammarNames.StructuredReferenceExpression: case GrammarNames.StructuredReferenceSpecifier: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v172/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v172 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table, UserDefinedFunction } public class ParserReference { public ReferenceType ReferenceType { get; set; } public ParseTreeNode ReferenceNode { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; set; } public string MinLocation { get; set; } public string MaxLocation { get; set; } public string[] TableSpecifiers { get; set; } public string[] TableColumns { get; set; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath; } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString; TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; case GrammarNames.UDFunctionCall: ReferenceType = ReferenceType.UserDefinedFunction; Name = node.ChildNodes[0].ChildNodes[0].Token.ValueString.TrimEnd('('); break; } ReferenceNode = node; LocationString = node.Print(); if (ReferenceType == ReferenceType.UserDefinedFunction && Name != null) { LocationString = LocationString.Substring(0, LocationString.IndexOf('(', LocationString.LastIndexOf(Name, System.StringComparison.Ordinal))); } } private string UnEscape(string value, string escapeCharacter) { return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", ""); } public override string ToString() { return LocationString; } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v172/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v172 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v172/WordsTerminal.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; namespace XLParser.Web.XLParserVersions.v172 { /// /// Terminal that can determine, if there the input contains a one of expected words. /// /// Children of each node are represented as an array to allow direct indexation. Do not use /// for words that have a large difference between low and high character of a token. public class WordsTerminal : Terminal { private readonly Node _rootNode; private readonly List _words; private bool _caseSensitive; public WordsTerminal(string name, IEnumerable words) : base(name) { _rootNode = new Node(0); _words = new List(words); } public override void Init(GrammarData grammarData) { base.Init(grammarData); _caseSensitive = Grammar.CaseSensitive; foreach (var word in _words) { AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant()); } if (EditorInfo == null) { EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); } } public override IList GetFirsts() => _words; public override Token TryMatch(ParsingContext context, ISourceStream source) { var node = _rootNode; var input = source.Text; for (var i = source.PreviewPosition; i < input.Length; ++i) { var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]); var nextNode = node[c]; if (nextNode is null) { break; } node = nextNode; } if (!node.IsTerminal) { return null; } source.PreviewPosition += node.Length; return source.CreateToken(OutputTerminal); } private void AddWordToTree(string word) { var node = _rootNode; foreach (var c in word) { node = node.GetOrAddChild(c); } node.IsTerminal = true; } private class Node { private char _lowChar = '\0'; private char _highChar = '\0'; private Node[] _children; public Node(int length) { Length = length; } public bool IsTerminal { get; set; } public int Length { get; } public Node this[char c] { get { if (_children is null) { return null; } if (c < _lowChar || c > _highChar) { return null; } return _children[c - _lowChar]; } } internal Node GetOrAddChild(char c) { if (_children is null) { var node = new Node(Length + 1); _children = new[] { node }; _lowChar = c; _highChar = c; return node; } var newLowChar = (char)Math.Min(_lowChar, c); if (newLowChar != _lowChar) { var newChildrenCount = _highChar - newLowChar + 1; Array.Resize(ref _children, newChildrenCount); var ofs = _lowChar - newLowChar; Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs); Array.Clear(_children, 0, ofs); _lowChar = newLowChar; return _children[0] = new Node(Length + 1); } var newHighChar = (char)Math.Max(_highChar, c); if (newHighChar != _highChar) { var newChildrenCount = newHighChar - _lowChar + 1; Array.Resize(ref _children, newChildrenCount); _highChar = newHighChar; return _children[newChildrenCount - 1] = new Node(Length + 1); } var charIdx = c - _lowChar; var child = _children[charIdx]; if (child is null) { return _children[charIdx] = new Node(Length + 1); } return child; } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v173/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; namespace XLParser.Web.XLParserVersions.v173 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.7.3", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.AllowStartEndDot) { DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt } }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF }; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '(')) { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})"; private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])"; private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix) { Priority = TerminalPriority.CellToken }; private static readonly HashSet UnicodeLetterCategories = new HashSet { UnicodeCategory.UppercaseLetter, UnicodeCategory.LowercaseLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter }; // 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators... private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray(); private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray(); // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix) { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NamedRangeCombinationRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" // allow large cell references (e.g. A1048577) as named range + "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex, ColumnPrefix.Concat(new[] { "T", "F" }).ToArray()) { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_") { Priority = TerminalPriority.ReservedName }; #region Structured References private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)"; public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#") { Priority = TerminalPriority.StructuredReference }; private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+"; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex) { Priority = TerminalPriority.StructuredReference }; #endregion #region Prefixes private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\](?!,)(?=.*!)"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[") { Priority = TerminalPriority.FileNameNumericToken }; private const string fileNameInBracketsRegex = @"\[[^\[\]]+\](?!,)(?=.*!)"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[") { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\\[\]]+\..{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*"; private const string urlPathRegex = @"http(s?)\://([\p{L}\p{N}-_]+\.[\p{L}\p{N}-_]*)+(:[0-9]+)?/([\p{L}\p{N}\-\.\?\,\'+&%\$#_ ()]*/)*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier); public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceQualifier.Rule = NameToken; StructuredReferenceSpecifier.Rule = SRSpecifierToken | at | OpenSquareParen + SRSpecifierToken + CloseSquareParen; StructuredReferenceColumn.Rule = SRColumnToken | OpenSquareParen + SRColumnToken + CloseSquareParen; StructuredReferenceExpression.Rule = StructuredReferenceColumn | StructuredReferenceColumn + colon + StructuredReferenceColumn | at + StructuredReferenceColumn | at + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn ; StructuredReference.Rule = OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int Name = -800; public const int ReservedName = -700; public const int StructuredReference = -500; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v173; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceColumn = "StructuredReferenceColumn"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier"; public const string StructuredReferenceQualifier = "StructuredReferenceQualifier"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRColumn = "SRColumnToken"; public const string TokenSRSpecifier = "SRSpecifierToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v173/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v173 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: case GrammarNames.RefError: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0]).FirstOrDefault(); var rangeEnd = GetParserReferences(node.ChildNodes[2]).FirstOrDefault(); if (rangeStart?.ReferenceType == ReferenceType.Cell && rangeEnd?.ReferenceType == ReferenceType.Cell) { ParserReference range = rangeStart; range.MaxLocation = rangeEnd.MinLocation; range.ReferenceType = ReferenceType.CellRange; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); break; } if (rangeStart?.ReferenceType == ReferenceType.Table && rangeEnd?.ReferenceType == ReferenceType.Table && rangeStart.Name == rangeEnd.Name && rangeStart.TableColumns.Length == 1 && rangeEnd.TableColumns.Length == 1) { ParserReference range = rangeStart; range.TableColumns = rangeStart.TableColumns.Concat(rangeEnd.TableColumns).ToArray(); range.TableSpecifiers = rangeStart.TableSpecifiers.SequenceEqual(rangeEnd.TableSpecifiers) ? range.TableSpecifiers : new string[0]; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); break; } } list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; } return list; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":") && input.ChildNodes[0].ChildNodes.Last().Type() == input.ChildNodes[2].ChildNodes.Last().Type(); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReference: case GrammarNames.StructuredReferenceColumn: case GrammarNames.StructuredReferenceExpression: case GrammarNames.StructuredReferenceSpecifier: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v173/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v173 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table, UserDefinedFunction } public class ParserReference { public ReferenceType ReferenceType { get; set; } public ParseTreeNode ReferenceNode { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; set; } public string MinLocation { get; set; } public string MaxLocation { get; set; } public string[] TableSpecifiers { get; set; } public string[] TableColumns { get; set; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath.Replace("''", "'"); } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString; TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; case GrammarNames.UDFunctionCall: ReferenceType = ReferenceType.UserDefinedFunction; Name = node.ChildNodes[0].ChildNodes[0].Token.ValueString.TrimEnd('('); break; } ReferenceNode = node; LocationString = node.Print(); if (ReferenceType == ReferenceType.UserDefinedFunction && Name != null) { LocationString = LocationString.Substring(0, LocationString.IndexOf('(', LocationString.LastIndexOf(Name, System.StringComparison.Ordinal))); } } private string UnEscape(string value, string escapeCharacter) { return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", ""); } public override string ToString() { return LocationString; } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v173/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v173 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v173/WordsTerminal.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; namespace XLParser.Web.XLParserVersions.v173 { /// /// Terminal that can determine, if there the input contains a one of expected words. /// /// Children of each node are represented as an array to allow direct indexation. Do not use /// for words that have a large difference between low and high character of a token. public class WordsTerminal : Terminal { private readonly Node _rootNode; private readonly List _words; private bool _caseSensitive; public WordsTerminal(string name, IEnumerable words) : base(name) { _rootNode = new Node(0); _words = new List(words); } public override void Init(GrammarData grammarData) { base.Init(grammarData); _caseSensitive = Grammar.CaseSensitive; foreach (var word in _words) { AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant()); } if (EditorInfo == null) { EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); } } public override IList GetFirsts() => _words; public override Token TryMatch(ParsingContext context, ISourceStream source) { var node = _rootNode; var input = source.Text; for (var i = source.PreviewPosition; i < input.Length; ++i) { var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]); var nextNode = node[c]; if (nextNode is null) { break; } node = nextNode; } if (!node.IsTerminal) { return null; } source.PreviewPosition += node.Length; return source.CreateToken(OutputTerminal); } private void AddWordToTree(string word) { var node = _rootNode; foreach (var c in word) { node = node.GetOrAddChild(c); } node.IsTerminal = true; } private class Node { private char _lowChar = '\0'; private char _highChar = '\0'; private Node[] _children; public Node(int length) { Length = length; } public bool IsTerminal { get; set; } public int Length { get; } public Node this[char c] { get { if (_children is null) { return null; } if (c < _lowChar || c > _highChar) { return null; } return _children[c - _lowChar]; } } internal Node GetOrAddChild(char c) { if (_children is null) { var node = new Node(Length + 1); _children = new[] { node }; _lowChar = c; _highChar = c; return node; } var newLowChar = (char)Math.Min(_lowChar, c); if (newLowChar != _lowChar) { var newChildrenCount = _highChar - newLowChar + 1; Array.Resize(ref _children, newChildrenCount); var ofs = _lowChar - newLowChar; Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs); Array.Clear(_children, 0, ofs); _lowChar = newLowChar; return _children[0] = new Node(Length + 1); } var newHighChar = (char)Math.Max(_highChar, c); if (newHighChar != _highChar) { var newChildrenCount = newHighChar - _lowChar + 1; Array.Resize(ref _children, newChildrenCount); _highChar = newHighChar; return _children[newChildrenCount - 1] = new Node(Length + 1); } var charIdx = c - _lowChar; var child = _children[charIdx]; if (child is null) { return _children[charIdx] = new Node(Length + 1); } return child; } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v174/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; namespace XLParser.Web.XLParserVersions.v174 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.7.4", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.AllowStartEndDot) { DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt } }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF }; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '(')) { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})"; private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])"; private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix) { Priority = TerminalPriority.CellToken }; private static readonly HashSet UnicodeLetterCategories = new HashSet { UnicodeCategory.UppercaseLetter, UnicodeCategory.LowercaseLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter }; // 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators... private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray(); private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray(); // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix) { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NamedRangeCombinationRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" // allow large cell references (e.g. A1048577) as named range + "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex, ColumnPrefix.Concat(new[] { "T", "F" }).ToArray()) { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_") { Priority = TerminalPriority.ReservedName }; #region Structured References private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)"; public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#") { Priority = TerminalPriority.StructuredReference }; private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+"; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex) { Priority = TerminalPriority.StructuredReference }; #endregion #region Prefixes private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\](?!,)(?=.*!)"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[") { Priority = TerminalPriority.FileNameNumericToken }; private static readonly string fileNameInBracketsRegex = @"\[[^\[\]]+\]" + $"(?={normalSheetName}|{quotedSheetName}'|!)"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[") { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\\[\]]+\.[a-zA-z]{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @~]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*"; private const string urlPathRegex = @"https?\:(//|\\\\)[\p{L}\p{N}\-_.]+(:[0-9]+)?(/|\\)([\p{L}\p{N}\-_.?,'+&%\$# ()~]*(/|\\))*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier); public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash | exclamationMark + Reference ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceQualifier.Rule = NameToken; StructuredReferenceSpecifier.Rule = SRSpecifierToken | at | OpenSquareParen + SRSpecifierToken + CloseSquareParen; StructuredReferenceColumn.Rule = SRColumnToken | OpenSquareParen + SRColumnToken + CloseSquareParen; StructuredReferenceExpression.Rule = StructuredReferenceColumn | StructuredReferenceColumn + colon + StructuredReferenceColumn | at + StructuredReferenceColumn | at + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn ; StructuredReference.Rule = OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, exclamationMark); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int Name = -800; public const int ReservedName = -700; public const int StructuredReference = -500; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v174; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceColumn = "StructuredReferenceColumn"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier"; public const string StructuredReferenceQualifier = "StructuredReferenceQualifier"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRColumn = "SRColumnToken"; public const string TokenSRSpecifier = "SRSpecifierToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v174/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v174 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } var quotedSheetNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted)); foreach (ParseTreeNode quotedSheetNode in quotedSheetNodes) { PrefixInfo.FixQuotedSheetNodeForWhitespace(quotedSheetNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: case GrammarNames.RefError: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0]).FirstOrDefault(); var rangeEnd = GetParserReferences(node.ChildNodes[2]).FirstOrDefault(); if (rangeStart?.ReferenceType == ReferenceType.Cell && rangeEnd?.ReferenceType == ReferenceType.Cell) { ParserReference range = rangeStart; range.MaxLocation = rangeEnd.MinLocation; range.ReferenceType = ReferenceType.CellRange; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); break; } if (rangeStart?.ReferenceType == ReferenceType.Table && rangeEnd?.ReferenceType == ReferenceType.Table && rangeStart.Name == rangeEnd.Name && rangeStart.TableColumns.Length == 1 && rangeEnd.TableColumns.Length == 1) { ParserReference range = rangeStart; range.TableColumns = rangeStart.TableColumns.Concat(rangeEnd.TableColumns).ToArray(); range.TableSpecifiers = rangeStart.TableSpecifiers.SequenceEqual(rangeEnd.TableSpecifiers) ? range.TableSpecifiers : new string[0]; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); break; } } list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; } return list; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":") && input.ChildNodes[0].ChildNodes.Last().Type() == input.ChildNodes[2].ChildNodes.Last().Type(); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReference: case GrammarNames.StructuredReferenceColumn: case GrammarNames.StructuredReferenceExpression: case GrammarNames.StructuredReferenceSpecifier: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v174/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v174 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table, UserDefinedFunction } public class ParserReference { public ReferenceType ReferenceType { get; set; } public ParseTreeNode ReferenceNode { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; set; } public string MinLocation { get; set; } public string MaxLocation { get; set; } public string[] TableSpecifiers { get; set; } public string[] TableColumns { get; set; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath.Replace("''", "'"); } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.ValueString; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.ValueString; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.ValueString; TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.ValueString, "'")).ToArray(); break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.ValueString.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; case GrammarNames.UDFunctionCall: ReferenceType = ReferenceType.UserDefinedFunction; Name = node.ChildNodes[0].ChildNodes[0].Token.ValueString.TrimEnd('('); break; } ReferenceNode = node; LocationString = node.Print(); if (ReferenceType == ReferenceType.UserDefinedFunction && Name != null) { LocationString = LocationString.Substring(0, LocationString.IndexOf('(', LocationString.LastIndexOf(Name, System.StringComparison.Ordinal))); } } private string UnEscape(string value, string escapeCharacter) { return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", ""); } public override string ToString() { return LocationString; } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v174/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v174 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check if multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixQuotedSheetNodeForWhitespace(ParseTreeNode quotedSheetNode, string sourceText) { var newPosition = GetSheetNamePositionFromSourceText(quotedSheetNode, sourceText); SourceLocation currentLocation = quotedSheetNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); quotedSheetNode.Span = new SourceSpan(newLocation, quotedSheetNode.Span.EndPosition - newPosition); // Cannot directly assign to quotedSheetNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(quotedSheetNode.Token, sourceText.Substring(newPosition, quotedSheetNode.Span.Length)); } private static int GetSheetNamePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v174/WordsTerminal.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; namespace XLParser.Web.XLParserVersions.v174 { /// /// Terminal that can determine, if there the input contains a one of expected words. /// /// Children of each node are represented as an array to allow direct indexation. Do not use /// for words that have a large difference between low and high character of a token. public class WordsTerminal : Terminal { private readonly Node _rootNode; private readonly List _words; private bool _caseSensitive; public WordsTerminal(string name, IEnumerable words) : base(name) { _rootNode = new Node(0); _words = new List(words); } public override void Init(GrammarData grammarData) { base.Init(grammarData); _caseSensitive = Grammar.CaseSensitive; foreach (var word in _words) { AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant()); } if (EditorInfo == null) { EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); } } public override IList GetFirsts() => _words; public override Token TryMatch(ParsingContext context, ISourceStream source) { var node = _rootNode; var input = source.Text; for (var i = source.PreviewPosition; i < input.Length; ++i) { var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]); var nextNode = node[c]; if (nextNode is null) { break; } node = nextNode; } if (!node.IsTerminal) { return null; } source.PreviewPosition += node.Length; return source.CreateToken(OutputTerminal); } private void AddWordToTree(string word) { var node = _rootNode; foreach (var c in word) { node = node.GetOrAddChild(c); } node.IsTerminal = true; } private class Node { private char _lowChar = '\0'; private char _highChar = '\0'; private Node[] _children; public Node(int length) { Length = length; } public bool IsTerminal { get; set; } public int Length { get; } public Node this[char c] { get { if (_children is null) { return null; } if (c < _lowChar || c > _highChar) { return null; } return _children[c - _lowChar]; } } internal Node GetOrAddChild(char c) { if (_children is null) { var node = new Node(Length + 1); _children = new[] { node }; _lowChar = c; _highChar = c; return node; } var newLowChar = (char)Math.Min(_lowChar, c); if (newLowChar != _lowChar) { var newChildrenCount = _highChar - newLowChar + 1; Array.Resize(ref _children, newChildrenCount); var ofs = _lowChar - newLowChar; Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs); Array.Clear(_children, 0, ofs); _lowChar = newLowChar; return _children[0] = new Node(Length + 1); } var newHighChar = (char)Math.Max(_highChar, c); if (newHighChar != _highChar) { var newChildrenCount = newHighChar - _lowChar + 1; Array.Resize(ref _children, newChildrenCount); _highChar = newHighChar; return _children[newChildrenCount - 1] = new Node(Length + 1); } var charIdx = c - _lowChar; var child = _children[charIdx]; if (child is null) { return _children[charIdx] = new Node(Length + 1); } return child; } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v175/ExcelFormulaGrammar.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; namespace XLParser.Web.XLParserVersions.v175 { /// /// Contains the XLParser grammar /// [Language("Excel Formulas", "1.7.5", "Grammar for Excel Formulas")] public class ExcelFormulaGrammar : Grammar { #region 1-Terminals #region Symbols and operators public Terminal at => ToTerm("@"); public Terminal comma => ToTerm(","); public Terminal colon => ToTerm(":"); public Terminal hash => ToTerm("#"); public Terminal semicolon => ToTerm(";"); public Terminal OpenParen => ToTerm("("); public Terminal CloseParen => ToTerm(")"); public Terminal CloseSquareParen => ToTerm("]"); public Terminal OpenSquareParen => ToTerm("["); public Terminal exclamationMark => ToTerm("!"); public Terminal CloseCurlyParen => ToTerm("}"); public Terminal OpenCurlyParen => ToTerm("{"); public Terminal QuoteS => ToTerm("'"); public Terminal mulop => ToTerm("*"); public Terminal plusop => ToTerm("+"); public Terminal divop => ToTerm("/"); public Terminal minop => ToTerm("-"); public Terminal concatop => ToTerm("&"); public Terminal expop => ToTerm("^"); // Intersect op is a single space, which cannot be parsed normally so we need an ImpliedSymbolTerminal // Attention: ImpliedSymbolTerminal seems to break if you assign it a priority, and its default priority is low public Terminal intersectop { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenIntersect); public Terminal percentop => ToTerm("%"); public Terminal gtop => ToTerm(">"); public Terminal eqop => ToTerm("="); public Terminal ltop => ToTerm("<"); public Terminal neqop => ToTerm("<>"); public Terminal gteop => ToTerm(">="); public Terminal lteop => ToTerm("<="); #endregion #region Literals public Terminal BoolToken { get; } = new RegexBasedTerminal(GrammarNames.TokenBool, "TRUE|FALSE", "T", "F") { Priority = TerminalPriority.Bool }; public Terminal NumberToken { get; } = new NumberLiteral(GrammarNames.TokenNumber, NumberOptions.AllowStartEndDot) { DefaultIntTypes = new[] { TypeCode.Int32, TypeCode.Int64, NumberLiteral.TypeCodeBigInt } }; public Terminal TextToken { get; } = new StringLiteral(GrammarNames.TokenText, "\"", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes); public Terminal SingleQuotedStringToken { get; } = new StringLiteral(GrammarNames.TokenSingleQuotedString, "'", StringOptions.AllowsDoubledQuote | StringOptions.AllowsLineBreak | StringOptions.NoEscapes) { Priority = TerminalPriority.SingleQuotedString }; public Terminal ErrorToken { get; } = new RegexBasedTerminal(GrammarNames.TokenError, "#NULL!|#DIV/0!|#VALUE!|#NAME\\?|#NUM!|#N/A|#GETTING_DATA|#SPILL!", "#"); public Terminal RefErrorToken => ToTerm("#REF!", GrammarNames.TokenRefError); #endregion #region Functions private const string SpecialUdfChars = "¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷"; // Non-word characters from ISO 8859-1 that are allowed in VBA identifiers private const string AllUdfChars = SpecialUdfChars + @"\\.\w"; private const string UdfPrefixRegex = @"('[^<>""/\|?*]+\.xla'!|_xll\.)"; // The following regex uses the rather exotic feature Character Class Subtraction // https://docs.microsoft.com/en-us/dotnet/standard/base-types/character-classes-in-regular-expressions#CharacterClassSubtraction private static readonly string UdfTokenRegex = $@"([{AllUdfChars}-[CcRr]]|{UdfPrefixRegex}[{AllUdfChars}]|{UdfPrefixRegex}?[{AllUdfChars}]{{2,1023}})\("; public Terminal UDFToken { get; } = new RegexBasedTerminal(GrammarNames.TokenUDF, UdfTokenRegex) { Priority = TerminalPriority.UDF }; public Terminal ExcelRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelRefFunction, "(INDEX|OFFSET|INDIRECT)\\(", "I", "O") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelConditionalRefFunctionToken { get; } = new RegexBasedTerminal(GrammarNames.TokenExcelConditionalRefFunction, "(IF|CHOOSE)\\(", "I", "C") { Priority = TerminalPriority.ExcelRefFunction }; public Terminal ExcelFunction { get; } = new WordsTerminal(GrammarNames.ExcelFunction, excelFunctionList.Select(f => f + '(')) { Priority = TerminalPriority.ExcelFunction }; // Using this instead of Empty allows a more accurate tree public Terminal EmptyArgumentToken { get; } = new ImpliedSymbolTerminal(GrammarNames.TokenEmptyArgument); #endregion #region References and names private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})"; private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])"; private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray(); public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix); public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix); private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern; public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix) { Priority = TerminalPriority.CellToken }; private static readonly HashSet UnicodeLetterCategories = new HashSet { UnicodeCategory.UppercaseLetter, UnicodeCategory.LowercaseLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter }; // 48718 letters, but it allows parser to from tokens starting with digits, parentheses, operators... private static readonly string[] UnicodeLetters = Enumerable.Range(0, ushort.MaxValue).Where(codePoints => UnicodeLetterCategories.Contains(CharUnicodeInfo.GetUnicodeCategory((char)codePoints))).Select(codePoint => char.ToString((char)codePoint)).ToArray(); private static readonly string[] NameStartCharPrefix = UnicodeLetters.Concat(new[] { @"\", "_" }).ToArray(); // Start with a letter or underscore, continue with word character (letters, numbers and underscore), dot or question mark private const string NameStartCharRegex = @"[\p{L}\\_]"; private const string NameValidCharacterRegex = @"[\w\\_\.\?€]"; public Terminal NameToken { get; } = new RegexBasedTerminal(GrammarNames.TokenName, NameStartCharRegex + NameValidCharacterRegex + "*", NameStartCharPrefix) { Priority = TerminalPriority.Name }; // Words that are valid names, but are disallowed by Excel. E.g. "A1" is a valid name, but it is not because it is also a cell reference. // If we ever parse R1C1 references, make sure to include them here // TODO: Add all function names here private const string NamedRangeCombinationRegex = "((TRUE|FALSE)" + NameValidCharacterRegex + "+)" // \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token + "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)" // allow large cell references (e.g. A1048577) as named range + "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)" ; // To prevent e.g. "A1A1" being parsed as 2 cell tokens public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex, ColumnPrefix.Concat(new[] { "T", "F" }).ToArray()) { Priority = TerminalPriority.NamedRangeCombination }; public Terminal ReservedNameToken = new RegexBasedTerminal(GrammarNames.TokenReservedName, @"_xlnm\.[a-zA-Z_]+", "_") { Priority = TerminalPriority.ReservedName }; #region Structured References private const string SRSpecifierRegex = @"#(All|Data|Headers|Totals|This Row)"; public Terminal SRSpecifierToken = new RegexBasedTerminal(GrammarNames.TokenSRSpecifier, SRSpecifierRegex, "#") { Priority = TerminalPriority.StructuredReference }; private const string SRColumnRegex = @"(?:[^\[\]'#@]|(?:'['\[\]#@]))+"; public Terminal SRColumnToken = new RegexBasedTerminal(GrammarNames.TokenSRColumn, SRColumnRegex) { Priority = TerminalPriority.StructuredReference }; #endregion #region Prefixes private const string mustBeQuotedInSheetName = @"\(\);{}#""=<>&+\-*/\^%, "; private const string notSheetNameChars = @"'*\[\]\\:/?"; //const string singleQuotedContent = @"\w !@#$%^&*()\-\+={}|:;<>,\./\?" + "\\\""; //const string sheetRegEx = @"(([\w\.]+)|('([" + singleQuotedContent + @"]|'')+'))!"; private static readonly string normalSheetName = $"[^{notSheetNameChars}{mustBeQuotedInSheetName}]+"; private static readonly string quotedSheetName = $"([^{notSheetNameChars}]|'')*"; //private static readonly string sheetRegEx = $"(({normalSheetName})|('{quotedSheetName}'))!"; public Terminal SheetToken = new RegexBasedTerminal(GrammarNames.TokenSheet, $"{normalSheetName}!") { Priority = TerminalPriority.SheetToken }; public Terminal SheetQuotedToken = new RegexBasedTerminal(GrammarNames.TokenSheetQuoted, $"{quotedSheetName}'!") { Priority = TerminalPriority.SheetQuotedToken }; private static readonly string multiSheetRegex = $"{normalSheetName}:{normalSheetName}!"; private static readonly string multiSheetQuotedRegex = $"{quotedSheetName}:{quotedSheetName}'!"; public Terminal MultipleSheetsToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheets, multiSheetRegex) { Priority = TerminalPriority.MultipleSheetsToken }; public Terminal MultipleSheetsQuotedToken = new RegexBasedTerminal(GrammarNames.TokenMultipleSheetsQuoted, multiSheetQuotedRegex) { Priority = TerminalPriority.MultipleSheetsToken }; private const string fileNameNumericRegex = @"\[[0-9]+\](?!,)(?=.*!)"; public Terminal FileNameNumericToken = new RegexBasedTerminal(GrammarNames.TokenFileNameNumeric, fileNameNumericRegex, "[") { Priority = TerminalPriority.FileNameNumericToken }; private static readonly string fileNameInBracketsRegex = @"\[[^\[\]]+\]" + $"(?={normalSheetName}!|{quotedSheetName}'!|{multiSheetRegex}|{multiSheetQuotedRegex}|!)"; public Terminal FileNameEnclosedInBracketsToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFileNameEnclosedInBrackets, fileNameInBracketsRegex, "[") { Priority = TerminalPriority.FileName }; // Source: https://stackoverflow.com/a/14632579 private const string fileNameRegex = @"[^\.\\\[\]]+\.[a-zA-z]{1,4}"; public Terminal FileName { get; } = new RegexBasedTerminal(GrammarNames.TokenFileName, fileNameRegex) { Priority = TerminalPriority.FileName }; // Source: http://stackoverflow.com/a/6416209/572635 private const string windowsFilePathRegex = @"(?:[a-zA-Z]:|\\?\\?[\w\-.$ @~]+)\\(([^<>\"" /\|?*\\']|( |''))*\\)*"; private const string urlPathRegex = @"https?\:(//|\\\\)[\p{L}\p{N}\-_.]+(:[0-9]+)?(/|\\)([\p{L}\p{N}\-_.?,'+&%\$# ()~]*(/|\\))*"; private const string filePathRegex = @"(" + windowsFilePathRegex + @"|" + urlPathRegex + @")"; public Terminal FilePathToken { get; } = new RegexBasedTerminal(GrammarNames.TokenFilePath, filePathRegex) { Priority = TerminalPriority.FileNamePath }; #endregion #endregion #endregion #region 2-NonTerminals // Most non-terminals are first defined here, so they can be used anywhere in the rules // Otherwise you can only use non-terminals that have been defined previously public NonTerminal Argument{ get; } = new NonTerminal(GrammarNames.Argument); public NonTerminal Arguments{ get; } = new NonTerminal(GrammarNames.Arguments); public NonTerminal ArrayColumns{ get; } = new NonTerminal(GrammarNames.ArrayColumns); public NonTerminal ArrayConstant{ get; } = new NonTerminal(GrammarNames.ArrayConstant); public NonTerminal ArrayFormula{ get; } = new NonTerminal(GrammarNames.ArrayFormula); public NonTerminal ArrayRows{ get; } = new NonTerminal(GrammarNames.ArrayRows); public NonTerminal Bool{ get; } = new NonTerminal(GrammarNames.Bool); public NonTerminal Cell{ get; } = new NonTerminal(GrammarNames.Cell); public NonTerminal Constant{ get; } = new NonTerminal(GrammarNames.Constant); public NonTerminal ConstantArray{ get; } = new NonTerminal(GrammarNames.ConstantArray); public NonTerminal DynamicDataExchange{ get; } = new NonTerminal(GrammarNames.DynamicDataExchange); public NonTerminal EmptyArgument{ get; } = new NonTerminal(GrammarNames.EmptyArgument); public NonTerminal Error{ get; } = new NonTerminal(GrammarNames.Error); public NonTerminal File { get; } = new NonTerminal(GrammarNames.File); public NonTerminal Formula{ get; } = new NonTerminal(GrammarNames.Formula); public NonTerminal FormulaWithEq{ get; } = new NonTerminal(GrammarNames.FormulaWithEq); public NonTerminal FunctionCall{ get; } = new NonTerminal(GrammarNames.FunctionCall); public NonTerminal FunctionName{ get; } = new NonTerminal(GrammarNames.FunctionName); public NonTerminal HRange{ get; } = new NonTerminal(GrammarNames.HorizontalRange); public NonTerminal InfixOp{ get; } = new NonTerminal(GrammarNames.TransientInfixOp); public NonTerminal MultiRangeFormula{ get; } = new NonTerminal(GrammarNames.MultiRangeFormula); public NonTerminal NamedRange{ get; } = new NonTerminal(GrammarNames.NamedRange); public NonTerminal Number{ get; } = new NonTerminal(GrammarNames.Number); public NonTerminal PostfixOp{ get; } = new NonTerminal(GrammarNames.TransientPostfixOp); public NonTerminal Prefix{ get; } = new NonTerminal(GrammarNames.Prefix); public NonTerminal PrefixOp{ get; } = new NonTerminal(GrammarNames.TransientPrefixOp); public NonTerminal QuotedFileSheet{ get; } = new NonTerminal(GrammarNames.QuotedFileSheet); public NonTerminal Reference{ get; } = new NonTerminal(GrammarNames.Reference); public NonTerminal ReferenceItem{ get; } = new NonTerminal(GrammarNames.TransientReferenceItem); public NonTerminal ReferenceFunctionCall{ get; } = new NonTerminal(GrammarNames.ReferenceFunctionCall); public NonTerminal RefError{ get; } = new NonTerminal(GrammarNames.RefError); public NonTerminal RefFunctionName{ get; } = new NonTerminal(GrammarNames.RefFunctionName); public NonTerminal ReservedName{ get; } = new NonTerminal(GrammarNames.ReservedName); public NonTerminal Sheet{ get; } = new NonTerminal(GrammarNames.Sheet); public NonTerminal Start{ get; } = new NonTerminal(GrammarNames.TransientStart); public NonTerminal StructuredReference { get; } = new NonTerminal(GrammarNames.StructuredReference); public NonTerminal StructuredReferenceColumn { get; } = new NonTerminal(GrammarNames.StructuredReferenceColumn); public NonTerminal StructuredReferenceExpression { get; } = new NonTerminal(GrammarNames.StructuredReferenceExpression); public NonTerminal StructuredReferenceSpecifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceSpecifier); public NonTerminal StructuredReferenceQualifier { get; } = new NonTerminal(GrammarNames.StructuredReferenceQualifier); public NonTerminal Text{ get; } = new NonTerminal(GrammarNames.Text); public NonTerminal UDFName{ get; } = new NonTerminal(GrammarNames.UDFName); public NonTerminal UDFunctionCall{ get; } = new NonTerminal(GrammarNames.UDFunctionCall); public NonTerminal Union{ get; } = new NonTerminal(GrammarNames.Union); public NonTerminal VRange{ get; } = new NonTerminal(GrammarNames.VerticalRange); #endregion public ExcelFormulaGrammar() { #region Punctuation MarkPunctuation(OpenParen, CloseParen); MarkPunctuation(OpenCurlyParen, CloseCurlyParen); #endregion #region Rules #region Base rules Root = Start; Start.Rule = FormulaWithEq | Formula | ArrayFormula | MultiRangeFormula ; MarkTransient(Start); ArrayFormula.Rule = OpenCurlyParen + eqop + Formula + CloseCurlyParen; MultiRangeFormula.Rule = eqop + Union; FormulaWithEq.Rule = eqop + Formula; Formula.Rule = Reference + ReduceHere() | Constant | FunctionCall | ConstantArray | OpenParen + Formula + CloseParen | ReservedName ; ReservedName.Rule = ReservedNameToken; Constant.Rule = Number | Text | Bool | Error ; Text.Rule = TextToken; Number.Rule = NumberToken; Bool.Rule = BoolToken; Error.Rule = ErrorToken; RefError.Rule = RefErrorToken; #endregion #region Functions FunctionCall.Rule = FunctionName + Arguments + CloseParen | PrefixOp + Formula | Formula + PostfixOp | Formula + InfixOp + Formula ; FunctionName.Rule = ExcelFunction; Arguments.Rule = MakeStarRule(Arguments, comma, Argument); EmptyArgument.Rule = EmptyArgumentToken; Argument.Rule = Formula | EmptyArgument; PrefixOp.Rule = ImplyPrecedenceHere(Precedence.UnaryPreFix) + plusop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + minop | ImplyPrecedenceHere(Precedence.UnaryPreFix) + at; MarkTransient(PrefixOp); InfixOp.Rule = expop | mulop | divop | plusop | minop | concatop | gtop | eqop | ltop | neqop | gteop | lteop; MarkTransient(InfixOp); // ImplyPrecedenceHere doesn't seem to work for this rule, but postfix has such a high priority shift will nearly always be the correct action PostfixOp.Rule = PreferShiftHere() + percentop; MarkTransient(PostfixOp); #endregion #region References Reference.Rule = ReferenceItem | ReferenceFunctionCall | OpenParen + Reference + PreferShiftHere() + CloseParen | Prefix + ReferenceItem | DynamicDataExchange ; ReferenceFunctionCall.Rule = Reference + colon + Reference | Reference + intersectop + Reference | OpenParen + Union + CloseParen | RefFunctionName + Arguments + CloseParen | Reference + hash | exclamationMark + Reference ; RefFunctionName.Rule = ExcelRefFunctionToken | ExcelConditionalRefFunctionToken; Union.Rule = MakePlusRule(Union, comma, Reference); ReferenceItem.Rule = Cell | NamedRange | VRange | HRange | RefError | UDFunctionCall | StructuredReference ; MarkTransient(ReferenceItem); UDFunctionCall.Rule = UDFName + Arguments + CloseParen; UDFName.Rule = UDFToken; VRange.Rule = VRangeToken; HRange.Rule = HRangeToken; Cell.Rule = CellToken; File.Rule = FileNameNumericToken | FileNameEnclosedInBracketsToken | FilePathToken + FileNameEnclosedInBracketsToken | FilePathToken + FileName ; DynamicDataExchange.Rule = File + exclamationMark + SingleQuotedStringToken; NamedRange.Rule = NameToken | NamedRangeCombinationToken; Prefix.Rule = SheetToken | QuoteS + SheetQuotedToken | File + SheetToken | QuoteS + File + SheetQuotedToken | File + exclamationMark | MultipleSheetsToken | QuoteS + MultipleSheetsQuotedToken | File + MultipleSheetsToken | QuoteS + File + MultipleSheetsQuotedToken | RefErrorToken ; StructuredReferenceQualifier.Rule = NameToken; StructuredReferenceSpecifier.Rule = SRSpecifierToken | at | OpenSquareParen + SRSpecifierToken + CloseSquareParen; StructuredReferenceColumn.Rule = SRColumnToken | OpenSquareParen + SRColumnToken + CloseSquareParen; StructuredReferenceExpression.Rule = StructuredReferenceColumn | StructuredReferenceColumn + colon + StructuredReferenceColumn | at + StructuredReferenceColumn | at + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn | StructuredReferenceSpecifier + comma + StructuredReferenceSpecifier + comma + StructuredReferenceColumn + colon + StructuredReferenceColumn ; StructuredReference.Rule = OpenSquareParen + StructuredReferenceExpression + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + CloseSquareParen | StructuredReferenceQualifier + OpenSquareParen + StructuredReferenceExpression + CloseSquareParen ; #endregion #region Arrays ConstantArray.Rule = OpenCurlyParen + ArrayColumns + CloseCurlyParen; ArrayColumns.Rule = MakePlusRule(ArrayColumns, semicolon, ArrayRows); ArrayRows.Rule = MakePlusRule(ArrayRows, comma, ArrayConstant); ArrayConstant.Rule = Constant | PrefixOp + Number | RefError; #endregion #endregion #region 5-Operator Precedence // Some of these operators are neutral associative instead of left associative, // but this ensures a consistent parse tree. As a lot of code is "hardcoded" onto the specific // structure of the parse tree, we like consistency. RegisterOperators(Precedence.Comparison, Associativity.Left, eqop, ltop, gtop, lteop, gteop, neqop); RegisterOperators(Precedence.Concatenation, Associativity.Left, concatop); RegisterOperators(Precedence.Addition, Associativity.Left, plusop, minop); RegisterOperators(Precedence.Multiplication, Associativity.Left, mulop, divop); RegisterOperators(Precedence.Exponentiation, Associativity.Left, expop); RegisterOperators(Precedence.UnaryPostFix, Associativity.Left, percentop, hash); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, at); RegisterOperators(Precedence.UnaryPreFix, Associativity.Left, exclamationMark); RegisterOperators(Precedence.Union, Associativity.Left, comma); RegisterOperators(Precedence.Intersection, Associativity.Left, intersectop); RegisterOperators(Precedence.Range, Associativity.Left, colon); #endregion } #region Precedence and Priority constants // Source: https://support.office.com/en-us/article/Calculation-operators-and-precedence-48be406d-4975-4d31-b2b8-7af9e0e2878a // Could also be an enum, but this way you don't need int casts private static class Precedence { // Don't use priority 0, Irony seems to view it as no priority set public const int Comparison = 1; public const int Concatenation = 2; public const int Addition = 3; public const int Multiplication = 4; public const int Exponentiation = 5; public const int UnaryPostFix = 6; public const int UnaryPreFix = 7; //public const int Reference = 8; public const int Union = 9; public const int Intersection = 10; public const int Range = 11; } // Terminal priorities, indicates to lexer which token it should pick when multiple tokens can match // E.g. "A1" is both a CellToken and NamedRange, pick cell token because it has a higher priority // E.g. "A1Blah" Is Both a CellToken + NamedRange, NamedRange and NamedRangeCombination, pick NamedRangeCombination private static class TerminalPriority { // Irony Low value //public const int Low = -1000; public const int Name = -800; public const int ReservedName = -700; public const int StructuredReference = -500; public const int FileName = -500; public const int FileNamePath = -800; public const int SingleQuotedString = -100; // Irony Normal value, default value //public const int Normal = 0; public const int Bool = 0; public const int MultipleSheetsToken = 100; // Irony High value //public const int High = 1000; public const int CellToken = 1000; public const int NamedRangeCombination = 1100; public const int UDF = 1150; public const int ExcelFunction = 1200; public const int ExcelRefFunction = 1200; public const int FileNameNumericToken = 1200; public const int SheetToken = 1200; public const int SheetQuotedToken = 1200; } #endregion private static string[] excelFunctionList => GetExcelFunctionList(); private static string[] GetExcelFunctionList() { var resource = Properties.Resources.ExcelBuiltinFunctionList_v175; using (var sr = new StringReader(resource)) return sr.ReadToEnd().Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); } } #region Names /// /// Collection of names used for terminals and non-terminals in the Excel Formula Grammar. /// /// /// Using these is strongly recommended, as these will change when breaking changes occur. /// It also allows you to see which code works on what grammar constructs. /// // Keep these constants instead of methods/properties, since that allows them to be used in switch statements. public static class GrammarNames { #region Non-Terminals public const string Argument = "Argument"; public const string Arguments = "Arguments"; public const string ArrayColumns = "ArrayColumns"; public const string ArrayConstant = "ArrayConstant"; public const string ArrayFormula = "ArrayFormula"; public const string ArrayRows = "ArrayRows"; public const string Bool = "Bool"; public const string Cell = "Cell"; public const string Constant = "Constant"; public const string ConstantArray = "ConstantArray"; public const string DynamicDataExchange = "DynamicDataExchange"; public const string EmptyArgument = "EmptyArgument"; public const string Error = "Error"; public const string ExcelFunction = "ExcelFunction"; public const string File = "File"; public const string Formula = "Formula"; public const string FormulaWithEq = "FormulaWithEq"; public const string FunctionCall = "FunctionCall"; public const string FunctionName = "FunctionName"; public const string HorizontalRange = "HRange"; public const string MultiRangeFormula = "MultiRangeFormula"; public const string NamedRange = "NamedRange"; public const string Number = "Number"; public const string Prefix = "Prefix"; public const string QuotedFileSheet = "QuotedFileSheet"; public const string Range = "Range"; public const string Reference = "Reference"; public const string ReferenceFunctionCall = "ReferenceFunctionCall"; public const string RefError = "RefError"; public const string RefFunctionName = "RefFunctionName"; public const string ReservedName = "ReservedName"; public const string Sheet = "Sheet"; public const string StructuredReference = "StructuredReference"; public const string StructuredReferenceColumn = "StructuredReferenceColumn"; public const string StructuredReferenceExpression = "StructuredReferenceExpression"; public const string StructuredReferenceSpecifier = "StructuredReferenceSpecifier"; public const string StructuredReferenceQualifier = "StructuredReferenceQualifier"; public const string Text = "Text"; public const string UDFName = "UDFName"; public const string UDFunctionCall = "UDFunctionCall"; public const string Union = "Union"; public const string VerticalRange = "VRange"; #endregion #region Transient Non-Terminals public const string TransientStart = "Start"; public const string TransientInfixOp = "InfixOp"; public const string TransientPostfixOp = "PostfixOp"; public const string TransientPrefixOp = "PrefixOp"; public const string TransientReferenceItem = "ReferenceItem"; #endregion #region Terminals public const string TokenBool = "BoolToken"; public const string TokenCell = "CellToken"; public const string TokenEmptyArgument = "EmptyArgumentToken"; public const string TokenError = "ErrorToken"; public const string TokenExcelRefFunction = "ExcelRefFunctionToken"; public const string TokenExcelConditionalRefFunction = "ExcelConditionalRefFunctionToken"; public const string TokenFilePath = "FilePathToken"; public const string TokenFileName = "FileNameToken"; public const string TokenFileNameEnclosedInBrackets = "FileNameEnclosedInBracketsToken"; public const string TokenFileNameNumeric = "FileNameNumericToken"; public const string TokenHRange = "HRangeToken"; public const string TokenIntersect = "INTERSECT"; public const string TokenMultipleSheets = "MultipleSheetsToken"; public const string TokenMultipleSheetsQuoted = "MultipleSheetsQuotedToken"; public const string TokenName = "NameToken"; public const string TokenNamedRangeCombination = "NamedRangeCombinationToken"; public const string TokenNumber = "NumberToken"; public const string TokenRefError = "RefErrorToken"; public const string TokenReservedName = "ReservedNameToken"; public const string TokenSingleQuotedString = "SingleQuotedString"; public const string TokenSheet = "SheetNameToken"; public const string TokenSheetQuoted = "SheetNameQuotedToken"; public const string TokenSRColumn = "SRColumnToken"; public const string TokenSRSpecifier = "SRSpecifierToken"; public const string TokenText = "TextToken"; public const string TokenUDF = "UDFToken"; public const string TokenUnionOperator = ","; public const string TokenVRange = "VRangeToken"; #endregion } #endregion } ================================================ FILE: app/XLParser.Web/XLParserVersions/v175/ExcelFormulaParser.cs ================================================ using System; using System.Collections.Generic; using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v175 { /// /// Excel formula parser
/// Contains parser and utilities that operate directly on the parse tree, or makes working with the parse tree easier. ///
public static class ExcelFormulaParser { /// /// Thread-local singleton parser instance /// [ThreadStatic] private static Parser _p; /// /// Thread-safe parser /// private static Parser P => _p ?? (_p = new Parser(new ExcelFormulaGrammar())); /// /// Parse a formula, return the the tree's root node /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree root node public static ParseTreeNode Parse(string input) { return ParseToTree(input).Root; } /// /// Parse a formula, return the the tree /// /// The formula to be parsed. /// /// If formula could not be parsed /// /// Parse tree public static ParseTree ParseToTree(string input) { var tree = P.Parse(input); if (tree.HasErrors()) { throw new ArgumentException("Failed parsing input <<" + input + ">>"); } var intersects = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenIntersect)); foreach (ParseTreeNode intersect in intersects) { var newLocation = new SourceLocation(intersect.Span.Location.Position - 1, intersect.Span.Location.Line, intersect.Span.Location.Column - 1); intersect.Span = new SourceSpan(newLocation, 1); } //Quoted sheets and SR Columns require the preceding whitespaces skipped by Irony as they are needed for unique sheet and column names var precedingWhiteSpaceNodes = tree.Root.AllNodes().Where(node => node.Is(GrammarNames.TokenSheetQuoted) | node.Is(GrammarNames.TokenSRColumn)); foreach (ParseTreeNode precedingWhiteSpaceNode in precedingWhiteSpaceNodes) { PrefixInfo.FixPrecedingWhiteSpaces(precedingWhiteSpaceNode, input); } return tree; } /// /// Non-terminal nodes in depth-first pre-order, with a conditional stop /// /// The root node /// Don't process the children of a node matching this predicate // inspiration taken from https://irony.codeplex.com/discussions/213938 public static IEnumerable AllNodesConditional(this ParseTreeNode root, Predicate stopAt = null) { var stack = new Stack(); stack.Push(root); while (stack.Count > 0) { var node = stack.Pop(); yield return node; // Check if we don't want to process the children of this node if (stopAt != null && stopAt(node)) continue; var children = node.ChildNodes; // Push children on in reverse order so that they will // be evaluated left -> right when popped. for (int i = children.Count - 1; i >= 0; i--) { stack.Push(children[i]); } } } /// /// All non-terminal nodes in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root) { return AllNodesConditional(root); } /// /// All non-terminal nodes of a certain type in depth-first pre-order /// public static IEnumerable AllNodes(this ParseTreeNode root, string type) { return AllNodes(root.AllNodes(), type); } internal static IEnumerable AllNodes(IEnumerable allNodes, string type) { return allNodes.Where(node => node.Is(type)); } /// /// Get the parent node of a node /// /// /// This is an expensive operation, as the whole tree will be searched through /// public static ParseTreeNode Parent(this ParseTreeNode child, ParseTreeNode treeRoot) { var parent = treeRoot.AllNodes() .FirstOrDefault(node => node.ChildNodes.Any(c => c == child)); if(parent == null) throw new ArgumentException("Child is not part of the tree", nameof(child)); return parent; } /// /// The node type/name /// public static string Type(this ParseTreeNode node) { return node.Term.Name; } /// /// Check if a node is of a particular type /// public static bool Is(this ParseTreeNode pt, string type) { return pt.Type() == type; } /// /// Checks whether this node is a function /// public static Boolean IsFunction(this ParseTreeNode input) { return input.Is(GrammarNames.FunctionCall) || input.Is(GrammarNames.ReferenceFunctionCall) || input.Is(GrammarNames.UDFunctionCall) // This gives potential problems/duplication on external UDFs, but they are so rare that I think this is acceptable || (input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsFunction()) ; } /// /// Whether or not this node represents parentheses "(_)" /// public static bool IsParentheses(this ParseTreeNode input) { switch (input.Type()) { case GrammarNames.Formula: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Formula); case GrammarNames.Reference: return input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Reference); default: return false; } } public static bool IsBinaryOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 3 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsBinaryNonReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.FunctionCall); } public static bool IsBinaryReferenceOperation(this ParseTreeNode input) { return input.IsBinaryOperation() && input.Is(GrammarNames.ReferenceFunctionCall); } public static bool IsUnaryOperation(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) || IsUnaryPostfixOperation(input); } public static bool IsUnaryPrefixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[0].Term.Flags.HasFlag(TermFlags.IsOperator); } public static bool IsUnaryPostfixOperation(this ParseTreeNode input) { return input.IsFunction() && input.ChildNodes.Count == 2 && input.ChildNodes[1].Term.Flags.HasFlag(TermFlags.IsOperator); } private static string RemoveFinalSymbol(string input) { input = input.Substring(0, input.Length - 1); return input; } /// /// Get the function or operator name of this function call /// public static string GetFunction(this ParseTreeNode input) { if (input.IsIntersection()) { return GrammarNames.TokenIntersect; } if (input.IsUnion()) { return GrammarNames.TokenUnionOperator; } if (input.IsBinaryOperation() || input.IsUnaryPostfixOperation()) { return input.ChildNodes[1].Print(); } if (input.IsUnaryPrefixOperation()) { return input.ChildNodes[0].Print(); } if (input.IsNamedFunction()) { return RemoveFinalSymbol(input.ChildNodes[0].Print()).ToUpper(); } if (input.IsExternalUDFunction()) { return $"{input.ChildNodes[0].Print()}{GetFunction(input.ChildNodes[1])}"; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Check if this node is a specific function /// public static bool MatchFunction(this ParseTreeNode input, string functionName) { return IsFunction(input) && GetFunction(input) == functionName; } /// /// Get all the arguments of a function or operation /// public static IEnumerable GetFunctionArguments(this ParseTreeNode input) { if (input.IsNamedFunction()) { return input .ChildNodes[1] // "Arguments" non-terminal .ChildNodes // "Argument" non-terminals .Select(node => node.ChildNodes[0]) ; } if (input.IsBinaryOperation()) { return new[] {input.ChildNodes[0], input.ChildNodes[2]}; } if (input.IsUnaryPrefixOperation()) { return new[] {input.ChildNodes[1]}; } if (input.IsUnaryPostfixOperation()) { return new[] {input.ChildNodes[0]}; } if (input.IsUnion()) { return input.ChildNodes[0].ChildNodes; } if (input.IsExternalUDFunction()) { return input // Reference .ChildNodes[1] // UDFunctionCall .ChildNodes[1] // Arguments .ChildNodes // Argument non-terminals .Select(node => node.ChildNodes[0]) ; } throw new ArgumentException("Not a function call", nameof(input)); } /// /// Checks whether this node is a built-in excel function /// public static bool IsBuiltinFunction(this ParseTreeNode node) { return node.IsFunction() && (node.ChildNodes[0].Is(GrammarNames.FunctionName) || node.ChildNodes[0].Is(GrammarNames.RefFunctionName)); } /// /// Whether or not this node represents an intersection /// public static bool IsIntersection(this ParseTreeNode input) { return IsBinaryOperation(input) && input.ChildNodes[1].Token.Terminal.Name == GrammarNames.TokenIntersect; } /// /// Whether or not this node represents an union /// public static bool IsUnion(this ParseTreeNode input) { return input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.Union); } /// /// Checks whether this node is a function call with name, and not just a unary or binary operation /// public static bool IsNamedFunction(this ParseTreeNode input) { return (input.Is(GrammarNames.FunctionCall) && input.ChildNodes[0].Is(GrammarNames.FunctionName)) || (input.Is(GrammarNames.ReferenceFunctionCall) && input.ChildNodes[0].Is(GrammarNames.RefFunctionName)) || input.Is(GrammarNames.UDFunctionCall); } public static bool IsOperation(this ParseTreeNode input) { return input.IsBinaryOperation() || input.IsUnaryOperation(); } public static bool IsExternalUDFunction(this ParseTreeNode input) { return input.Is(GrammarNames.Reference) && input.ChildNodes.Count == 2 && input.ChildNodes[1].IsNamedFunction(); } /// /// True if this node presents a number constant with a sign /// public static bool IsNumberWithSign(this ParseTreeNode input) { return IsUnaryPrefixOperation(input) && input.ChildNodes[1].ChildNodes[0].Is(GrammarNames.Constant) && input.ChildNodes[1].ChildNodes[0].ChildNodes[0].Is(GrammarNames.Number); } /// /// Extract all of the information from a Prefix non-terminal /// public static PrefixInfo GetPrefixInfo(this ParseTreeNode prefix) => PrefixInfo.From(prefix); /// /// Go to the first non-formula child node /// public static ParseTreeNode SkipFormula(this ParseTreeNode input) { while (input.Is(GrammarNames.Formula)) { input = input.ChildNodes.First(); } return input; } /// /// Get all child nodes that are references and aren't part of another reference expression /// public static IEnumerable GetReferenceNodes(this ParseTreeNode input) { return input.AllNodesConditional(node => node.Is(GrammarNames.Reference)) .Where(node => node.Is(GrammarNames.Reference)) .Select(node => node.SkipToRelevant()) ; } /// /// Gets the ParserReferences from the input parse tree node and its children /// /// /// 5 cases: /// 1. ReferenceItem node: convert to ParserReference /// 2. Reference node (Prefix ReferenceItem): convert to ParserReference, recursive call on the nodes returned from GetReferenceNodes(node) /// (to include the references in the arguments of external UDFs) /// 3. Range node (Cell:Cell): recursive call to retrieve the 2 limits, create ParserReference of type CellRange /// 4. Range node with complex limits: recursive call to retrieve limits as 2 ParserReferences /// 5. Other cases (RefFunctionCall, Union, Arguments):recursive call on the nodes returned from GetReferenceNodes(node) /// public static IEnumerable GetParserReferences(this ParseTreeNode node) { if (node.Type() == GrammarNames.Reference && node.ChildNodes.Count == 1) node = node.ChildNodes[0]; var list = new List(); switch (node.Type()) { case GrammarNames.Cell: case GrammarNames.NamedRange: case GrammarNames.HorizontalRange: case GrammarNames.VerticalRange: case GrammarNames.StructuredReference: case GrammarNames.RefError: list.Add(new ParserReference(node)); break; case GrammarNames.Reference: list.Add(new ParserReference(node)); list.AddRange(node.ChildNodes[1].GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; default: if (node.IsRange()) { var rangeStart = GetParserReferences(node.ChildNodes[0]).FirstOrDefault(); var rangeEnd = GetParserReferences(node.ChildNodes[2]).FirstOrDefault(); if (rangeStart?.ReferenceType == ReferenceType.Cell && rangeEnd?.ReferenceType == ReferenceType.Cell) { ParserReference range = rangeStart; range.MaxLocation = rangeEnd.MinLocation; range.ReferenceType = ReferenceType.CellRange; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); break; } if (rangeStart?.ReferenceType == ReferenceType.Table && rangeEnd?.ReferenceType == ReferenceType.Table && rangeStart.Name == rangeEnd.Name && rangeStart.TableColumns.Length == 1 && rangeEnd.TableColumns.Length == 1) { ParserReference range = rangeStart; range.TableColumns = rangeStart.TableColumns.Concat(rangeEnd.TableColumns).ToArray(); range.TableSpecifiers = rangeStart.TableSpecifiers.SequenceEqual(rangeEnd.TableSpecifiers) ? range.TableSpecifiers : new string[0]; range.ReferenceNode = node; range.LocationString = node.Print(); list.Add(range); break; } } list.AddRange(node.GetReferenceNodes().SelectMany(x => x.GetParserReferences())); break; } return list; } /// /// Whether or not this node represents a range /// public static bool IsRange(this ParseTreeNode input) { return input.IsBinaryReferenceOperation() && input.ChildNodes[1].Is(":") && input.ChildNodes[0].ChildNodes.Last().Type() == input.ChildNodes[2].ChildNodes.Last().Type(); } /// /// Go to the first "relevant" child node, i.e. skips wrapper nodes /// /// The input parse tree node /// If true, skip all reference nodes without a prefix instead of only parentheses /// /// Skips: /// * FormulaWithEq and ArrayFormula nodes /// * Formula nodes /// * Parentheses /// * Reference nodes which are just wrappers /// public static ParseTreeNode SkipToRelevant(this ParseTreeNode input, bool skipReferencesWithoutPrefix = false) { while (true) { switch (input.Type()) { case GrammarNames.FormulaWithEq: case GrammarNames.ArrayFormula: input = input.ChildNodes[1]; break; case GrammarNames.Argument: case GrammarNames.Formula: if (input.ChildNodes.Count == 1) { input = input.ChildNodes[0]; } else { return input; } break; case GrammarNames.Reference: // Skip references which are parentheses // Skip references without a prefix (=> they only have one child node) if the option is set if ((skipReferencesWithoutPrefix && input.ChildNodes.Count == 1) || input.IsParentheses()) { input = input.ChildNodes[0]; } else { return input; } break; default: return input; } } } /// /// Pretty-print a parse tree to a string /// public static string Print(this ParseTreeNode input) { // For terminals, just print the token text if (input.Term is Terminal) { return input.Token.Text; } // (Lazy) enumerable for printed children var children = input.ChildNodes.Select(Print); // Concrete list when needed List childrenList; // Switch on non-terminals switch (input.Term.Name) { case GrammarNames.Formula: // Check if these are brackets, otherwise print first child return IsParentheses(input) ? $"({children.First()})" : children.First(); case GrammarNames.FunctionCall: case GrammarNames.ReferenceFunctionCall: case GrammarNames.UDFunctionCall: childrenList = children.ToList(); if (input.IsNamedFunction()) { return string.Join("", childrenList) + ")"; } if (input.IsBinaryOperation()) { // format string for "normal" binary operation string format = "{0}{1}{2}"; if (input.IsIntersection()) { format = "{0} {2}"; } return string.Format(format, childrenList[0], childrenList[1], childrenList[2]); } if (input.IsUnion()) { return $"({string.Join(",", childrenList)})"; } if (input.IsUnaryOperation()) { return string.Join("", childrenList); } throw new ArgumentException("Unknown function type."); case GrammarNames.Reference: return IsParentheses(input) ? $"({children.First()})" : string.Concat(children); case GrammarNames.Prefix: var ret = string.Join("", children); // The exclamation mark token is not included in the parse tree, so we have to add that if it's a single file if (input.ChildNodes.Count == 1 && input.ChildNodes[0].Is(GrammarNames.File)) { ret += "!"; } return ret; case GrammarNames.ArrayFormula: return "{=" + children.ElementAt(1) + "}"; // Terms for which to print all child nodes concatenated case GrammarNames.ArrayConstant: case GrammarNames.DynamicDataExchange: case GrammarNames.FormulaWithEq: case GrammarNames.File: case GrammarNames.MultiRangeFormula: case GrammarNames.StructuredReference: case GrammarNames.StructuredReferenceColumn: case GrammarNames.StructuredReferenceExpression: case GrammarNames.StructuredReferenceSpecifier: return string.Join("", children); // Terms for which we print the children comma-separated case GrammarNames.Arguments: case GrammarNames.ArrayRows: case GrammarNames.Union: return string.Join(",", children); case GrammarNames.ArrayColumns: return string.Join(";", children); case GrammarNames.ConstantArray: return $"{{{children.First()}}}"; default: // If it is not defined above and the number of children is exactly one, we want to just print the first child if (input.ChildNodes.Count == 1) { return children.First(); } throw new ArgumentException($"Could not print node of type '{input.Term.Name}'." + Environment.NewLine + "This probably means the Excel grammar was modified without the print function being modified"); } } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v175/ParserReference.cs ================================================ using System.Linq; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v175 { public enum ReferenceType { Cell, CellRange, UserDefinedName, HorizontalRange, VerticalRange, RefError, Table, UserDefinedFunction } public class ParserReference { public ReferenceType ReferenceType { get; set; } public ParseTreeNode ReferenceNode { get; set; } public string LocationString { get; set; } public string Worksheet { get; set; } public string LastWorksheet { get; set; } public string FilePath { get; set; } public string FileName { get; set; } public string Name { get; set; } public string MinLocation { get; set; } public string MaxLocation { get; set; } public string[] TableSpecifiers { get; set; } public string[] TableColumns { get; set; } public ParserReference(ParseTreeNode node) { InitializeReference(node); } /// /// Initializes the current object based on the input ParseTreeNode /// /// /// For Reference nodes (Prefix ReferenceItem), it initialize the values derived from the Prefix node and /// is re-invoked for the ReferenceItem node. /// public void InitializeReference(ParseTreeNode node) { switch (node.Type()) { case GrammarNames.Reference: PrefixInfo prefix = node.ChildNodes[0].GetPrefixInfo(); Worksheet = prefix.HasSheet ? prefix.Sheet.Replace("''", "'") : "(Undefined sheet)"; if (prefix.HasMultipleSheets) { string[] sheets = prefix.MultipleSheets.Split(':'); Worksheet = sheets[0]; LastWorksheet = sheets[1]; } if (prefix.HasFilePath) { FilePath = prefix.FilePath.Replace("''", "'"); } if (prefix.HasFileNumber) { FileName = prefix.FileNumber.ToString(); } else if (prefix.HasFileName) { FileName = prefix.FileName; } InitializeReference(node.ChildNodes[1]); break; case GrammarNames.Cell: ReferenceType = ReferenceType.Cell; MinLocation = node.ChildNodes[0].Token.Text; MaxLocation = MinLocation; break; case GrammarNames.NamedRange: ReferenceType = ReferenceType.UserDefinedName; Name = node.ChildNodes[0].Token.Text; break; case GrammarNames.StructuredReference: ReferenceType = ReferenceType.Table; Name = node.ChildNodes.FirstOrDefault(x => x.Type() == GrammarNames.StructuredReferenceQualifier)?.ChildNodes[0].Token.Text; TableSpecifiers = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRSpecifier) || x.Is("@")).Select(x => UnEscape(x.Token.Text, "'")).ToArray(); TableColumns = node.AllNodes().Where(x => x.Is(GrammarNames.TokenSRColumn)).Select(x => UnEscape(x.Token.Text, "'")).ToArray(); break; case GrammarNames.HorizontalRange: string[] horizontalLimits = node.ChildNodes[0].Token.Text.Split(':'); ReferenceType = ReferenceType.HorizontalRange; MinLocation = horizontalLimits[0]; MaxLocation = horizontalLimits[1]; break; case GrammarNames.VerticalRange: string[] verticalLimits = node.ChildNodes[0].Token.Text.Split(':'); ReferenceType = ReferenceType.VerticalRange; MinLocation = verticalLimits[0]; MaxLocation = verticalLimits[1]; break; case GrammarNames.RefError: ReferenceType = ReferenceType.RefError; break; case GrammarNames.UDFunctionCall: ReferenceType = ReferenceType.UserDefinedFunction; Name = node.ChildNodes[0].ChildNodes[0].Token.Text.TrimEnd('('); break; } ReferenceNode = node; LocationString = node.Print(); if (ReferenceType == ReferenceType.UserDefinedFunction && Name != null) { LocationString = LocationString.Substring(0, LocationString.IndexOf('(', LocationString.LastIndexOf(Name, System.StringComparison.Ordinal))); } } private string UnEscape(string value, string escapeCharacter) { return System.Text.RegularExpressions.Regex.Replace(value, $"{escapeCharacter}(?!{escapeCharacter})", ""); } public override string ToString() { return LocationString; } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v175/PrefixInfo.cs ================================================ using System; using System.Reflection; using System.Text; using Irony.Parsing; namespace XLParser.Web.XLParserVersions.v175 { // TODO: This class is a good example of why an AST is a good idea, for the prefixes the parse trees are too complicated to work with. See #23 /// /// Simple data class that holds information about a Prefix. /// /// public class PrefixInfo : IEquatable { public string FilePath { get; } public bool HasFilePath => FilePath != null; private readonly int? _fileNumber; public int FileNumber => _fileNumber.GetValueOrDefault(); public bool HasFileNumber => _fileNumber.HasValue; public string FileName { get; } public bool HasFileName => FileName != null; public bool HasFile => HasFileName || HasFileNumber; public string Sheet { get; } public bool HasSheet => Sheet != null; public string MultipleSheets { get; } public bool HasMultipleSheets => MultipleSheets != null; public bool IsQuoted { get; } public PrefixInfo(string sheet = null, int? fileNumber = null, string fileName = null, string filePath = null, string multipleSheets = null, bool isQuoted = false) { Sheet = sheet; _fileNumber = fileNumber; FileName = fileName; FilePath = filePath; MultipleSheets = multipleSheets; IsQuoted = isQuoted; } /// /// Create a PrefixInfo class from a parse tree node /// internal static PrefixInfo From(ParseTreeNode prefix) { if (prefix.Type() != GrammarNames.Prefix) { throw new ArgumentException("Not a prefix", nameof(prefix)); } string filePath = null; int? fileNumber = null; string fileName = null; string sheetName = null; string multipleSheets = null; // Token number we're processing var cur = 0; // Check for quotes var isQuoted = prefix.ChildNodes[cur].Is("'"); if (isQuoted) { cur++; } // Check and process file if (prefix.ChildNodes[cur].Is(GrammarNames.File)) { ParseTreeNode file = prefix.ChildNodes[cur]; if (file.ChildNodes[0].Is(GrammarNames.TokenFileNameNumeric)) { // Numeric filename fileNumber = int.TryParse(Substr(file.ChildNodes[0].Print(), 1, 1), out var n) ? n : default(int?); } else { // String filename var iCur = 0; // Check if it includes a path if (file.ChildNodes[iCur].Is(GrammarNames.TokenFilePath)) { filePath = file.ChildNodes[iCur].Print(); iCur++; } if (file.ChildNodes[iCur].Is(GrammarNames.TokenFileNameEnclosedInBrackets)) { fileName = Substr(file.ChildNodes[iCur].Print(), 1, 1); } else { fileName = file.ChildNodes[iCur].Print(); } } cur++; } // Check for a non-quoted sheet if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheet)) { sheetName = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for a quoted sheet else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenSheetQuoted)) { // remove quote and ! sheetName = Substr(prefix.ChildNodes[cur].Print(), 2); if (sheetName == "") { // The sheet name consists solely of whitespace (see https://github.com/spreadsheetlab/XLParser/issues/37) // We can not identify the sheet name in the case, and return all whitespace-only sheet names as if they were a single-space sheet name. sheetName = " "; } } // Check for multiple sheets else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheets)) { multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 1); } // Check for multiple sheets (quoted) else if (prefix.ChildNodes[cur].Is(GrammarNames.TokenMultipleSheetsQuoted)) { // remove quote and ! multipleSheets = Substr(prefix.ChildNodes[cur].Print(), 2); } return new PrefixInfo(sheetName, fileNumber, fileName, filePath, multipleSheets, isQuoted); } internal static void FixPrecedingWhiteSpaces(ParseTreeNode parseTreeNode, string sourceText) { var newPosition = GetNodePositionFromSourceText(parseTreeNode, sourceText); SourceLocation currentLocation = parseTreeNode.Span.Location; if (newPosition == currentLocation.Position) { return; } var newLocation = new SourceLocation(newPosition, currentLocation.Line, currentLocation.Column + currentLocation.Position - newPosition); parseTreeNode.Span = new SourceSpan(newLocation, parseTreeNode.Span.EndPosition - newPosition); // Cannot directly assign to parseTreeNode.Token.Text; it is read-only. Falling back on reflection. typeof(Token).GetField("Text", BindingFlags.Instance | BindingFlags.Public) ?.SetValue(parseTreeNode.Token, sourceText.Substring(newPosition, parseTreeNode.Span.Length)); } private static int GetNodePositionFromSourceText(ParseTreeNode nodeSheetQuoted, string sourceText) { var startIndex = nodeSheetQuoted.Span.Location.Position; while (startIndex > 0) { if (!char.IsWhiteSpace(sourceText[startIndex - 1])) { break; } startIndex--; } return startIndex; } private static string Substr(string s, int removeLast = 0, int removeFirst = 0) { return s.Substring(removeFirst, s.Length - removeLast - removeFirst); } public override bool Equals(object other) => Equals(other as PrefixInfo); public bool Equals(PrefixInfo other) { if (ReferenceEquals(null, other)) return false; if (ReferenceEquals(this, other)) return true; return _fileNumber == other._fileNumber && string.Equals(FilePath, other.FilePath, StringComparison.OrdinalIgnoreCase) && string.Equals(FileName, other.FileName, StringComparison.OrdinalIgnoreCase) && string.Equals(Sheet, other.Sheet, StringComparison.OrdinalIgnoreCase) && string.Equals(MultipleSheets, other.MultipleSheets, StringComparison.OrdinalIgnoreCase); } public override int GetHashCode() { unchecked { var hashCode = StringComparer.OrdinalIgnoreCase.GetHashCode(Sheet ?? ""); hashCode = (hashCode*397) ^ (FilePath != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FilePath) : 0); hashCode = (hashCode*397) ^ (FileName != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(FileName) : 0); hashCode = (hashCode*397) ^ (_fileNumber?.GetHashCode() ?? 0); hashCode = (hashCode*397) ^ (MultipleSheets != null ? StringComparer.OrdinalIgnoreCase.GetHashCode(MultipleSheets) : 0); return hashCode; } } public static bool operator ==(PrefixInfo left, PrefixInfo right) { return Equals(left, right); } public static bool operator !=(PrefixInfo left, PrefixInfo right) { return !Equals(left, right); } public override string ToString() { var res = new StringBuilder(); if (IsQuoted) res.Append("'"); if (HasFilePath) res.Append(FilePath); if (HasFileNumber) res.Append($"[{FileNumber}]"); if (HasFileName) res.Append($"[{FileName}]"); if (HasSheet) res.Append(Sheet); if (HasMultipleSheets) res.Append(MultipleSheets); if (IsQuoted) res.Append("'"); res.Append("!"); return res.ToString(); } } } ================================================ FILE: app/XLParser.Web/XLParserVersions/v175/WordsTerminal.cs ================================================ using Irony.Parsing; using System; using System.Collections.Generic; namespace XLParser.Web.XLParserVersions.v175 { /// /// Terminal that can determine, if there the input contains a one of expected words. /// /// Children of each node are represented as an array to allow direct indexation. Do not use /// for words that have a large difference between low and high character of a token. public class WordsTerminal : Terminal { private readonly Node _rootNode; private readonly List _words; private bool _caseSensitive; public WordsTerminal(string name, IEnumerable words) : base(name) { _rootNode = new Node(0); _words = new List(words); } public override void Init(GrammarData grammarData) { base.Init(grammarData); _caseSensitive = Grammar.CaseSensitive; foreach (var word in _words) { AddWordToTree(_caseSensitive ? word : word.ToUpperInvariant()); } if (EditorInfo == null) { EditorInfo = new TokenEditorInfo(TokenType.Unknown, TokenColor.Text, TokenTriggers.None); } } public override IList GetFirsts() => _words; public override Token TryMatch(ParsingContext context, ISourceStream source) { var node = _rootNode; var input = source.Text; for (var i = source.PreviewPosition; i < input.Length; ++i) { var c = _caseSensitive ? input[i] : char.ToUpperInvariant(input[i]); var nextNode = node[c]; if (nextNode is null) { break; } node = nextNode; } if (!node.IsTerminal) { return null; } source.PreviewPosition += node.Length; return source.CreateToken(OutputTerminal); } private void AddWordToTree(string word) { var node = _rootNode; foreach (var c in word) { node = node.GetOrAddChild(c); } node.IsTerminal = true; } private class Node { private char _lowChar = '\0'; private char _highChar = '\0'; private Node[] _children; public Node(int length) { Length = length; } public bool IsTerminal { get; set; } public int Length { get; } public Node this[char c] { get { if (_children is null) { return null; } if (c < _lowChar || c > _highChar) { return null; } return _children[c - _lowChar]; } } internal Node GetOrAddChild(char c) { if (_children is null) { var node = new Node(Length + 1); _children = new[] { node }; _lowChar = c; _highChar = c; return node; } var newLowChar = (char)Math.Min(_lowChar, c); if (newLowChar != _lowChar) { var newChildrenCount = _highChar - newLowChar + 1; Array.Resize(ref _children, newChildrenCount); var ofs = _lowChar - newLowChar; Array.Copy(_children, 0, _children, ofs, newChildrenCount - ofs); Array.Clear(_children, 0, ofs); _lowChar = newLowChar; return _children[0] = new Node(Length + 1); } var newHighChar = (char)Math.Max(_highChar, c); if (newHighChar != _highChar) { var newChildrenCount = newHighChar - _lowChar + 1; Array.Resize(ref _children, newChildrenCount); _highChar = newHighChar; return _children[newChildrenCount - 1] = new Node(Length + 1); } var charIdx = c - _lowChar; var child = _children[charIdx]; if (child is null) { return _children[charIdx] = new Node(Length + 1); } return child; } } } } ================================================ FILE: app/XLParser.Web/d3vizsvg.js ================================================ var default_formula = "SUM(B5,2)"; var latestVersion = "139"; var margin = { top: 20, right: 20, bottom: 20, left: 20 }; var width = Math.max(document.getElementById("d3viz").clientWidth - 10, 500) - margin.right - margin.left; var height = 600 - margin.top - margin.bottom; var i; var tree; var root; var diagonal = d3.svg.diagonal() .projection(function(d) { return [d.x, d.y]; }); var vis; // Replace the existing parse tree image with a new one function newTree(formula, version) { var encodedFormula = encodeURIComponent(formula); var url = "Parse.json?version=" + version + "&formula=" + encodedFormula; d3.select("#d3viz").html(""); // Request the JSON parse tree d3.json(url, function(request, json) { //console.log(json) //console.log(request) if (json !== undefined) { // Calculate the required width and height for the image var tw = treeWidth(json); var th = treeHeight(json); //console.log("W: " + tw + " H: " + th); //console.log(json); var w = Math.max(tw * 75, width); var h = Math.max(10 + th * 60, height); var imgW = w + margin.right + margin.left; var imgH = h + margin.top + margin.bottom; // create a tree and its container tree = d3.layout.tree().size([w, h]); i = 0; var svg = d3.select("#d3viz") .append("svg") .attr("id", "dynamicSVGParsetree") .attr("version", 1.1) .attr("xmlns", "http://www.w3.org/2000/svg") .attr("width", imgW) .attr("height", imgH); svg.append("style").attr("type", "text/css").text(svgCss); vis = svg.append("g").attr("transform", "translate(" + margin.left + "," + margin.top + ")"); // Create the tree nodes root = json; update(root); // Create images that can be downloaded generateImageData(imgW, imgH); } else { json = JSON.parse(request.response); var msg = "" + json.error; if (json.message !== undefined) { msg += " at line " + json.message.line + " column " + json.message.column; } msg += "

"; // Convert to entities to prevent XSS msg += "Input:

"; if (json.message !== undefined) { msg += ""; } d3.select("#d3viz").html(msg); } }); } // Set the parse tree image to the default formula and enter it in the formula input field newTree(default_formula, latestVersion); d3.select("#formulainput").text(default_formula); // Create nodes in the parse tree function update(source) { // Compute the new tree layout. var nodes = tree.nodes(root).reverse(); var links = tree.links(nodes); // Normalize for fixed-depth. nodes.forEach(function(d) { d.y = 10 + d.depth * 60; }); // Update the nodes… var node = vis.selectAll("g.node") .data(nodes, function(d) { return d.id || (d.id = ++i); }); // Enter any new nodes at the parent's previous position. var nodeEnter = node.enter().append("g") .attr("class", "node") .attr("transform", function(d) { return "translate(" + d.x + "," + d.y + ")"; }); nodeEnter.append("circle").attr("r", 8).style("fill", "#fff"); nodeEnter.append("text") .attr("y", function(d) { // Put nodes without children (terminals) below, nodes with (non-terminals) above return d.children || d._children ? -20 : 20; }) .attr("dy", ".31em") .attr("text-anchor", "middle") .text(function(d) { return d.name.replace("\n", "\\n"); }) .style("fill-opacity", 1); // Declare the links... var link = vis.selectAll("path.link") .data(links, function(d) { return d.target.id; }); // Enter the links link.enter().insert("path", "g") .attr("class", "link") .attr("d", diagonal); // Transition nodes to their new position. } // Get the approximate width of the tree, for the purpose of the image function treeWidth(node) { if (node.children == undefined) return 1; var sum = 0; // Add the width of all children for (var i = 0; i < node.children.length; i++) { sum += treeWidth(node.children[i]); } return sum; } // Get the maximum depth of the tree function treeHeight(node) { if (node.children == undefined) return 1; var max = 0; for (var i = 0; i < node.children.length; i++) { max = Math.max(max, treeHeight(node.children[i])); } return max + 1; } // Create a downloadable SVG and PNG image from the dynamic SVG parse tree image // From: http://techslides.com/save-svg-as-an-image function generateImageData(imgw, imgh) { var html = d3.select("#dynamicSVGParsetree") .node().parentNode.innerHTML; //console.log(html); // Encode the SVG data as base64 and put it in a data: link var svgSrc = "data:image/svg+xml;base64," + btoa(html); var imgDataSvg = $("#imgdatasvg"); imgDataSvg.attr("crossOrigin", "anonymous"); imgDataSvg.attr("href", svgSrc); imgDataSvg.attr("download", "parsetree.svg"); // Create a new image object from the SVG var image = new Image; image.src = svgSrc; image.onload = function() { // Once the image is loaded var imgDataPng = $("#imgdatapng"); try { // Create a canvas element and fill it with the SVG image var canvas = document.createElement("canvas"); canvas.width = imgw; canvas.height = imgh; canvas.style.backgroundColor = "white"; var canvasctx = canvas.getContext("2d"); canvasctx.drawImage(image, 0, 0); // Get the base64 encoded data URL for a PNG image from the canvas var pngsrc = canvas.toDataURL("image/png"); // Put it in a link imgDataPng.attr("href", pngsrc); imgDataPng.attr("download", "parsetree.png"); } catch (e) { imgDataPng.attr("href", "javascript: void(0)"); imgDataPng.off("click"); imgDataPng.on("click", function() { alert("An error occured while creating PNG.\n\n" + "If you are using a modern browser? This page might not have enough privileges to allow PNG creation. Increase trust level for this page.\n\n" + "Are you using an older browser? If so try a newer one.\n\n" + "Confirmed to work in Firefox 39 and Chrome 44."); return false; }); } }; }; var svgCss = ".node circle {\n" + " fill: #fff;\n" + " stroke: steelblue;\n" + " stroke-width: 1.5px;\n" + "}\n" + ".node text {\n" + " font-family: 'Helvetica Neue', Helvetica, sans-serif;" + " font-size: 14px;\n" + "}\n" + "path.link {\n" + " fill: none;\n" + " stroke: #cfcfcf;\n" + " stroke-width: 1.5px;\n" + "}"; ================================================ FILE: app/XLParser.Web/packages.config ================================================  ================================================ FILE: app/XLParser.Web/xlparser-web.css ================================================ #borderwrapper { border: 12px solid #217346; border-bottom-width: 6px; border-top-width: 14px; display: flex; flex: 0 1 auto; flex-direction: row; flex-wrap: nowrap; } #borderwrapper2 { border: 12px solid #217346; border-bottom-width: 14px; border-top-width: 6px; clear: both; flex: 1 1 auto; padding: 10px 75px 35px; } #errorformulainput { background-color: transparent; color: #000; margin-bottom: 10px; margin-top: 10px; } #errormessages { background-color: transparent; border: 1px solid #999; color: #000; font: 12px "Courier New",monospace; height: 15em; padding: 5px; width: 65em; } #formulainput, #errorformulainput { border: 1px solid #999; min-height: 19px; min-width: 25em; resize: both; } #formulainput, #errorformulainput, #errormessages { padding-bottom: 2px; padding-left: 5px; padding-top: 2px; } #leftoflogos { align-items: stretch; border-right: 12px solid #217346; flex-grow: 1; padding: 35px 75px 25px; } #logos { flex-grow: 0; height: 100%; overflow: hidden; padding-left: 50px; padding-right: 50px; padding-top: 35px; } #logos img { margin-bottom: 20px; width: 150px; } #parsebutton { background-color: #fff; border: 1px solid #999; border-radius: 4px; cursor: pointer; height: 24px; vertical-align: middle; width: 60px; } #parsebutton:hover { background-color: #ccc; } #parserversionselected { background-color: #fff; border: 1px solid #999; height: 24px; width: 75px; } .clear { clear: both; } body { -webkit-flex-flow: column; background-color: #f1f1f1; display: flex; flex-flow: column; font: 14px "Helvetica Neue",Helvetica,sans-serif; } h2 a { text-decoration: none; } html, body { height: 100%; margin: 0; } table { width: 100%; } textarea { width: 95%; } td, th { text-align: left; padding: 3px 0; vertical-align: top; } ================================================ FILE: app/lib/.gitignore ================================================ ================================================ FILE: doc/README.md ================================================ # XLParser Documentation For a full overview of XLParser, read the original paper: [A Grammar for Spreadsheet Formulas Evaluated on Two Large Datasets](https://drive.google.com/file/d/0B79P2Uym3JjvMjlaWWtnTWRLQmc/view?usp=sharing). ### API documentation You can generate API documentation using [Doxygen](www.doxygen.org) and the Doxyfile in api/Doxyfile ### Grammar Used Read [this paper](https://drive.google.com/file/d/0B79P2Uym3JjvMjlaWWtnTWRLQmc/view?usp=sharing) (v1.1.2) or this thesis (v1.2.1) for full details. The grammar can also be interfered from [the source code](../src/XLParser/ExcelFormulaGrammar.cs). Also available are the EBNF grammar ([PDF](ebnf.pdf), [PNG](ebnf.png), updated v1.2.1) and token list ([PDF](tokens.pdf), [PNG](tokens.png), updated v1.2.1). ================================================ FILE: doc/api/Doxyfile ================================================ # Doxyfile 1.8.17 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the configuration # file that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = XLParser # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = 1.7.5 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = ./doc/api # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all generated output in the proper direction. # Possible values are: None, LTR, RTL and Context. # The default value is: None. OUTPUT_TEXT_DIRECTION = None # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line # such as # /*************** # as being the beginning of a Javadoc-style comment "banner". If set to NO, the # Javadoc-style will behave just like regular comments and it will not be # interpreted by doxygen. # The default value is: NO. JAVADOC_BANNER = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines (in the resulting output). You can put ^^ in the value part of an # alias to insert a newline as if a physical newline was in the original file. # When you need a literal { or } or , in the value part of an alias you have to # escape them by means of a backslash (\), this can lead to conflicts with the # commands \{ and \} for these it is advised to use the version @{ and @} or use # a double escape (\\{ and \\}) ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = YES # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice # sources only. Doxygen will then generate output that is more tailored for that # language. For instance, namespaces will be presented as modules, types will be # separated into more groups, etc. # The default value is: NO. OPTIMIZE_OUTPUT_SLICE = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, # Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, # Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files), VHDL, tcl. For instance to make doxygen treat # .inc files as Fortran files (default is PHP), and .f files as C (default is # Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. # Minimum value: 0, maximum value: 99, default value: 5. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 5 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual # methods of a class will be included in the documentation. # The default value is: NO. EXTRACT_PRIV_VIRTUAL = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # declarations. If set to NO, these declarations will be included in the # documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # (including Cygwin) ands Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. If # EXTRACT_ALL is set to YES then this flag will automatically be disabled. # The default value is: NO. WARN_NO_PARAMDOC = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. # The default value is: NO. WARN_AS_ERROR = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = ./src/XLParser # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: https://www.gnu.org/software/libiconv/) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), # *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen # C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f, *.for, *.tcl, *.vhd, # *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.idl \ *.ddl \ *.odl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.cs \ *.d \ *.php \ *.php4 \ *.php5 \ *.phtml \ *.inc \ *.m \ *.markdown \ *.md \ *.mm \ *.dox \ *.py \ *.f90 \ *.f \ *.for \ *.tcl \ *.vhd \ *.vhdl \ *.ucf \ *.qsf \ *.as \ *.js # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the # clang parser (see: http://clang.llvm.org/) for more accurate parsing at the # cost of reduced performance. This can be particularly helpful with template # rich C++ code for which doxygen's built-in parser lacks the necessary type # information. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. # The default value is: NO. CLANG_ASSISTED_PARSING = NO # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that # the include paths will already be set by doxygen for the files and directories # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_OPTIONS = # If clang assisted parsing is enabled you can provide the clang parser with the # path to the compilation database (see: # http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files # were built. This is equivalent to specifying the "-p" option to a clang tool, # such as clang-check. These options will then be passed to the parser. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. CLANG_DATABASE_PATH = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML # documentation will contain a main index with vertical navigation menus that # are dynamically created via JavaScript. If disabled, the navigation index will # consists of multiple levels of tabs that are statically embedded in every HTML # page. Disable this option to support browsers that do not have JavaScript, # like the Qt help browser. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_MENUS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: https://developer.apple.com/xcode/), introduced with OSX # 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy # genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = YES # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANSPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands # to create new LaTeX commands to be used in formulas as building blocks. See # the section "Including formulas" for details. FORMULA_MACROFILE = # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # https://www.mathjax.org) which uses client side JavaScript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from https://www.mathjax.org before deployment. # The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /