Repository: rjrodger/jsonic Branch: main Commit: 38693d01a0a6 Files: 296 Total size: 2.7 MB Directory structure: gitextract_o0u0amii/ ├── .github/ │ └── workflows/ │ └── build.yml ├── .gitignore ├── CNAME ├── LICENSE ├── Makefile ├── README.md ├── TODO.md ├── bin/ │ ├── jsonic │ └── jsonic-bnf ├── dist/ │ ├── bnf.d.ts │ ├── bnf.js │ ├── debug.d.ts │ ├── debug.js │ ├── defaults.d.ts │ ├── defaults.js │ ├── error.d.ts │ ├── error.js │ ├── grammar.d.ts │ ├── grammar.js │ ├── jsonic-bnf-cli.d.ts │ ├── jsonic-bnf-cli.js │ ├── jsonic-cli.d.ts │ ├── jsonic-cli.js │ ├── jsonic.d.ts │ ├── jsonic.js │ ├── lexer.d.ts │ ├── lexer.js │ ├── parser.d.ts │ ├── parser.js │ ├── rules.d.ts │ ├── rules.js │ ├── self.d.ts │ ├── self.js │ ├── tsconfig.tsbuildinfo │ ├── types.d.ts │ ├── types.js │ ├── utility.d.ts │ └── utility.js ├── dist-test/ │ ├── prep.js │ └── tsconfig.tsbuildinfo ├── doc/ │ ├── api.md │ ├── bnf-to-jsonic-feasibility.md │ ├── lsp-feasibility.md │ ├── options.md │ ├── plugins.md │ └── syntax.md ├── docs/ │ ├── 404.html │ ├── CNAME │ ├── assets/ │ │ ├── css/ │ │ │ └── 0.styles.610e9dca.css │ │ └── js/ │ │ ├── 10.88d9a57b.js │ │ ├── 11.7988a5fa.js │ │ ├── 12.d9f3d941.js │ │ ├── 13.775f91ca.js │ │ ├── 14.f56c2700.js │ │ ├── 15.00058088.js │ │ ├── 16.70a6eea0.js │ │ ├── 17.0d1f36b1.js │ │ ├── 18.0f184e32.js │ │ ├── 19.57ad231b.js │ │ ├── 2.34930047.js │ │ ├── 20.58c8b075.js │ │ ├── 21.0c46ffa9.js │ │ ├── 22.965cbc0d.js │ │ ├── 23.18c270f0.js │ │ ├── 24.5895d76a.js │ │ ├── 25.a347f1d6.js │ │ ├── 26.c7b8345d.js │ │ ├── 27.4e6f90b7.js │ │ ├── 28.6e0fa7bc.js │ │ ├── 29.77b8e3b6.js │ │ ├── 3.6ce1235a.js │ │ ├── 30.69b1b865.js │ │ ├── 31.c68f976d.js │ │ ├── 32.0a08a140.js │ │ ├── 4.064b2ac3.js │ │ ├── 5.46815478.js │ │ ├── 6.f03d59ff.js │ │ ├── 7.6be3acca.js │ │ ├── 8.0a081a71.js │ │ ├── 9.9b6e31d5.js │ │ └── app.0c621e62.js │ ├── guide/ │ │ ├── alternatives.html │ │ ├── custom-parsers.html │ │ ├── getting-started.html │ │ ├── index.html │ │ ├── install.html │ │ ├── syntax-introduction.html │ │ └── tutorials.html │ ├── index.html │ ├── jsonic.js │ ├── plugin/ │ │ ├── csv.html │ │ ├── dynamic.html │ │ ├── hoover.html │ │ ├── index.html │ │ ├── json.html │ │ ├── multifile.html │ │ └── native.html │ ├── railroad-diagrams.css │ ├── ref/ │ │ ├── api.html │ │ ├── index.html │ │ ├── options.html │ │ └── syntax.html │ └── tutorial/ │ ├── index.html │ ├── parsing-csv.html │ ├── write-a-parser.html │ └── write-a-plugin.html ├── go/ │ ├── README.md │ ├── alignment_test.go │ ├── both_ref_test.go │ ├── color_test.go │ ├── comment_suffix_test.go │ ├── coverage.html │ ├── csv_grammar_test.go │ ├── debug.go │ ├── directive_grammar_test.go │ ├── doc/ │ │ ├── api.md │ │ ├── differences.md │ │ ├── options.md │ │ ├── plugins.md │ │ └── syntax.md │ ├── feature_tsv_test.go │ ├── fnref_identity_test.go │ ├── fnref_reinstall_test.go │ ├── go.mod │ ├── grammar.go │ ├── grammar_decl_test.go │ ├── grammar_setting_test.go │ ├── grammarspec.go │ ├── jsonic.go │ ├── jsonic_nontsv_test.go │ ├── jsonic_test.go │ ├── lexer.go │ ├── listchild_test.go │ ├── listref_test.go │ ├── mapref_test.go │ ├── nlookahead_test.go │ ├── options.go │ ├── options_parity_test.go │ ├── parser.go │ ├── plugin.go │ ├── plugin_test.go │ ├── readme_test.go │ ├── rule.go │ ├── rule_include_test.go │ ├── safe_test.go │ ├── text.go │ ├── textinfo_test.go │ ├── token.go │ ├── token_test.go │ ├── util.go │ ├── util_test.go │ ├── utility.go │ └── variant_test.go ├── package.json ├── src/ │ ├── bnf.ts │ ├── debug.ts │ ├── defaults.ts │ ├── error.ts │ ├── grammar.ts │ ├── jsonic-bnf-cli.ts │ ├── jsonic-cli.ts │ ├── jsonic.ts │ ├── lexer.ts │ ├── mfix.js │ ├── parser.ts │ ├── rules.ts │ ├── self.ts │ ├── tsconfig.json │ ├── types.ts │ └── utility.ts └── test/ ├── aa-wildcard.test.js ├── alignment.test.js ├── also-bad-plugin.js ├── angle.js ├── api.test.js ├── bad-plugin.js ├── bar.jsonic ├── bnf.test.js ├── cli.test.js ├── comma.test.js ├── comment.test.js ├── csv-grammar.test.js ├── custom.test.js ├── debug.test.js ├── directive-grammar.test.js ├── dive.js ├── doc.test.js ├── error.test.js ├── exhaust.js ├── feature.test.js ├── first-version-perf.js ├── first-version.test.js ├── foo.jsonic ├── grammar/ │ ├── arith-leftrec.bnf │ ├── arith.bnf │ ├── greet.bnf │ ├── json-subset.bnf │ ├── pair.bnf │ └── rfc3986-uri.abnf ├── grammar.test.js ├── happy.js ├── json-standard.js ├── jsonic.test.js ├── justjson.js ├── large.js ├── lex.test.js ├── long.js ├── module.mjs ├── multifile-remove/ │ ├── again.jsonic │ ├── blue01.js │ ├── func.js │ ├── green01.json │ ├── main01.jsonic │ └── trunk/ │ ├── branch.jsonic │ └── twig/ │ └── leaf1.jsonic ├── nlookahead.test.js ├── p0.js ├── p1.js ├── p2.js ├── pa-qa.js ├── perf.js ├── plugin-default.js ├── plugin-name.js ├── plugin.test.js ├── plugins-parity.sh ├── prep.ts ├── probe.test.js ├── quick.js ├── readme.test.js ├── require.js ├── rewind.test.js ├── rfc3986.test.js ├── safe.test.js ├── smoke.js ├── spec/ │ ├── alignment-empty.tsv │ ├── alignment-errors.tsv │ ├── alignment-map-merge.tsv │ ├── alignment-number-text.tsv │ ├── alignment-safe-key.tsv │ ├── alignment-structure.tsv │ ├── alignment-values.tsv │ ├── comma-implicit-comma.tsv │ ├── comma-optional-comma.tsv │ ├── exclude-comma-errors.tsv │ ├── exclude-comma.tsv │ ├── exclude-strict-json-errors.tsv │ ├── exclude-strict-json.tsv │ ├── feature-comment-suffix-block.tsv │ ├── feature-comment-suffix-line.tsv │ ├── feature-debug-cases.tsv │ ├── feature-implicit-map.tsv │ ├── feature-implicit-object.tsv │ ├── feature-list-child-deep.tsv │ ├── feature-list-child-pair-deep.tsv │ ├── feature-list-child-pair.tsv │ ├── feature-list-child.tsv │ ├── feature-list-pair.tsv │ ├── feature-map-child-deep.tsv │ ├── feature-map-child.tsv │ ├── feature-nested-space-pairs.tsv │ ├── fv-arrays.tsv │ ├── fv-comma.tsv │ ├── fv-deep.tsv │ ├── fv-drop-outs.tsv │ ├── fv-numbers.tsv │ ├── fv-subobj.tsv │ ├── fv-types.tsv │ ├── fv-works.tsv │ ├── happy.tsv │ ├── include-json-errors.tsv │ ├── include-json.tsv │ ├── jsonic-basic-array-tree.tsv │ ├── jsonic-basic-json.tsv │ ├── jsonic-basic-mixed-tree.tsv │ ├── jsonic-basic-object-tree.tsv │ ├── jsonic-funky-keys.tsv │ ├── jsonic-process-array.tsv │ ├── jsonic-process-implicit-object.tsv │ ├── jsonic-process-mixed-nodes.tsv │ ├── jsonic-process-object-tree.tsv │ ├── jsonic-process-scalars.tsv │ ├── jsonic-process-text.tsv │ ├── jsonic-process-whitespace.tsv │ ├── lex-errors.tsv │ ├── utility-deep.tsv │ ├── utility-modlist.tsv │ ├── utility-str.tsv │ └── utility-strinject.tsv ├── spec.test.js ├── syntax-error.js ├── test-plugins.sh ├── tsconfig.json ├── utility.js ├── utility.test.js ├── variant.test.js └── web-all.js ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/build.yml ================================================ name: build on: push: branches: [ main ] pull_request: branches: [ main ] jobs: build: strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] node-version: [24.x] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Use Node.js ${{ matrix.node-version }} uses: actions/setup-node@v4 with: node-version: ${{ matrix.node-version }} - run: npm i - run: npm run build --if-present - run: npm test build-go: strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v5 with: go-version: '1.24' - name: Build run: go build ./... working-directory: ./go - name: Test run: go test ./... working-directory: ./go ================================================ FILE: .gitignore ================================================ lib-cov *.seed *.log *.csv *.dat *.out *.pid *.gz pids logs results npm-debug.log *~ node_modules .idea/ trial test/coverage.html coverage package-lock.json yarn.lock ================================================ FILE: CNAME ================================================ jsonic.richardrodger.com ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2013-2020 Richard Rodger Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: Makefile ================================================ .PHONY: all build test clean build-ts build-go test-ts test-go clean-ts clean-go publish-go tags-go reset all: build test build: build-ts build-go test: test-ts test-go clean: clean-ts clean-go # TypeScript build-ts: npm run build test-ts: npm test clean-ts: rm -rf dist dist-test # Go build-go: cd go && go build ./... test-go: cd go && go test -v ./... clean-go: cd go && go clean # Publish Go module: make publish-go V=0.1.7 publish-go: test-go @test -n "$(V)" || (echo "Usage: make publish-go V=x.y.z" && exit 1) sed -i '' 's/^const Version = ".*"/const Version = "$(V)"/' go/jsonic.go sed -i '' 's/^Version: .*/Version: $(V)/' go/README.md git add go/jsonic.go go/README.md git commit -m "go: v$(V)" git tag go/v$(V) git push origin main go/v$(V) if command -v gh >/dev/null 2>&1; then gh release create go/v$(V) --title "go/v$(V)" --notes "Go module release v$(V)"; fi tags-go: git tag -l 'go/v*' --sort=-version:refname reset: npm run reset cd go && go clean -cache cd go && go build ./... cd go && go test -v ./... ================================================ FILE: README.md ================================================ # jsonic JSON is great. JSON parsers are not. They punish you for every missing quote and misplaced comma. You're a professional -- you know what you meant. jsonic knows too. ``` a:1,foo:bar → {"a": 1, "foo": "bar"} ``` It's a JSON parser that isn't strict. And it's very, very extensible. Available for [TypeScript/JavaScript](#install) and [Go](go/). ## Install ```bash npm install jsonic ``` ## Quick Example ```js const { Jsonic } = require('jsonic') // Relaxed syntax, just works Jsonic('a:1, b:2') // {"a": 1, "b": 2} Jsonic('x, y, z') // ["x", "y", "z"] Jsonic('{a: {b: 1, c: 2}}') // {"a": {"b": 1, "c": 2}} ``` ```ts import { Jsonic } from 'jsonic' Jsonic('a:1, b:2') // {"a": 1, "b": 2} ``` ## What Syntax Does jsonic Accept? More than you'd expect. All of the following parse to `{"a": 1, "b": "B"}`: ``` a:1,b:B ``` ``` a:1 b:B ``` ``` a:1 // a:2 # a:3 /* b wants * to B */ b:B ``` ``` { "a": 100e-2, '\u0062':`\x42`, } ``` That last one mixes double quotes, single quotes, backticks, unicode escapes, hex escapes, and scientific notation. It doesn't matter. jsonic handles it. Here's the full set of relaxations: - **Unquoted keys and values**: `a:1` → `{"a": 1}` - **Implicit top-level object**: `a:1,b:2` → `{"a": 1, "b": 2}` - **Implicit top-level array**: `a,b` → `["a", "b"]` - **Trailing commas**: `{a:1,b:2,}` → `{"a": 1, "b": 2}` - **Single-quoted strings**: `'hello'` works like `"hello"` - **Backtick strings**: `` `hello` `` works like `"hello"` - **Multiline strings**: backtick strings preserve newlines - **Indent-adjusted strings**: `'''...\n'''` trims leading indent - **Comments**: `//`, `#` (line), `/* */` (block) - **Object merging**: `a:{b:1},a:{c:2}` → `{"a": {"b": 1, "c": 2}}` - **Path diving**: `a:b:1,a:c:2` → `{"a": {"b": 1, "c": 2}}` - **All number formats**: `1e1 === 0xa === 0o12 === 0b1010`, plus `1_000` separators - **Auto-close at EOF**: unclosed `{` or `[` close automatically For the full syntax reference, see [doc/syntax.md](doc/syntax.md). ## Customization You might be tempted to think a lenient parser is a simple thing. It isn't. jsonic is built around a rule-based parser and a matcher-based lexer. Both are fully customizable through options and plugins. You can change almost anything about how parsing works -- and you don't have to understand the internals to do it. ### Options Let's start simple. Create a configured instance with `Jsonic.make()`: ```js const lenient = Jsonic.make({ comment: { lex: false }, // disable comments number: { hex: false }, // disable hex numbers value: { def: { yes: { val: true }, no: { val: false } } } }) lenient('yes') // true ``` Options compose. You turn things off, you turn things on, you define new value tokens. That's it. See [doc/options.md](doc/options.md) for the full options reference. ### Plugins When options aren't enough, plugins let you reach deeper. They can modify the grammar, add matchers, or hook into parse events: ```js function myPlugin(jsonic, options) { // Register a custom fixed token jsonic.options({ fixed: { token: { '#TL': '~' } } }) const T_TILDE = jsonic.token('#TL') // Modify grammar rules jsonic.rule('val', (rs) => { rs.open([{ s: [T_TILDE], a: (rule) => { rule.node = options.tildeValue ?? null } }]) }) } const j = Jsonic.make() j.use(myPlugin, { tildeValue: 42 }) j('~') // 42 ``` Consider what just happened: we invented a new syntax element (`~`), told the parser what to do when it encounters one, and wired it up with a configurable value. The parser itself doesn't care what symbols you use. It only cares about rules. See [doc/plugins.md](doc/plugins.md) for the plugin authoring guide. ## API Reference See [doc/api.md](doc/api.md) for the full API. The essentials: | Function / Property | Description | |---|---| | `Jsonic(src)` | Parse a string with default settings | | `Jsonic.make(options?)` | Create a configured parser instance | | `instance.use(plugin, opts?)` | Register a plugin | | `instance.rule(name, definer)` | Modify a grammar rule | | `instance.token(ref)` | Get or create a token type | | `instance.sub({lex?, rule?})` | Subscribe to parse events | | `instance.options` | Current options | ## Go Version There's a Go port with the same core parsing behavior. Same syntax, same relaxations, same results. See the [Go documentation](go/) for installation and usage. ```go import "github.com/jsonicjs/jsonic/go" result, err := jsonic.Parse("a:1, b:2") ``` ## License MIT. Copyright (c) Richard Rodger. ================================================ FILE: TODO.md ================================================ # TODO * P1; exception inside matcher needs own error code - too easy to miss! * P1; remove console colors in browser? option * P2; quotes are value enders - x:a"a" is an err! not 'a"a"', option? * P2: fix type chaining with jsonic.rule * P3; Consider: option to control comma null insertion * P3; YAML quoted strings: https://yaml-multiline.info/ - via options * provide in yaml plugin * P3; cli - less ambiguous merging at top level * P3; consistent use of clean on options to allow null to mean 'remove property' * P3; data file to diff exhaust changes * P3; define explicitly: p in close, r in open, behaviour * r in open means a run of opens with one close - see TOML - needs unit test * p in close means ??? - needs unit test * P3; docs: nice tree diagram of rules (generate?) * P3; docs: ref https://wiki.alopex.li/OnParsers * P3; document standard g names: open, close, step, start, end, imp, top, val, map, etc * P3; error if fixed tokens clash * P3; http://seriot.ch/projects/parsing_json.html * implement as tests * P3; if token recognized, error needs to be about token, not characters * P3; implicit lists in pair values: "a:1,2 b:3" -> {a:[1,2], b:3} - pair key terminates (A) * P3; internal errors - e.g. adding a null rulespec * P3; is s:[] needed? different from s:undefined ? * P3; line continuation ("\" at end) should be a feature of standard JSONIC text * P3; option for sparse arrays: https://dmitripavlutin.com/javascript-sparse-dense-arrays/ * P3; perhaps remove the # prefix from token names? * P3; rename tokens to be user friendly - maybe? * P3; specific error if rule name not found when parsing * P3; string format for rule def: s:'ST,NR' -> s:[ST,NR], also "s:ST,NR,p:foo,..." - needs (A) - can only used post standard definition (thus not in grammar.ts) * P3; support BigInt numbers: 123n * P3; unit test for custom alt error: eg. { e: (r: Rule) => r.close[0] } ??? bug: r.close empty! ================================================ FILE: bin/jsonic ================================================ #!/usr/bin/env node require('../dist/jsonic-cli').run(process.argv, console).catch((e) => console.error(e.message)) ================================================ FILE: bin/jsonic-bnf ================================================ #!/usr/bin/env node require('../dist/jsonic-bnf-cli').run(process.argv, console).catch((e) => console.error(e.message)) ================================================ FILE: dist/bnf.d.ts ================================================ import type { BnfConvertOptions, GrammarSpec, Rule } from './types'; type BnfElement = { kind: 'term'; literal: string; caseSensitive?: boolean; } | { kind: 'ref'; name: string; } | { kind: 'regex'; pattern: string; flags: string; } | { kind: 'opt'; inner: BnfElement; } | { kind: 'star'; inner: BnfElement; } | { kind: 'plus'; inner: BnfElement; } | { kind: 'rep'; min: number; max: number; inner: BnfElement; } | { kind: 'group'; alts: BnfSequence[]; }; type BnfSequence = BnfElement[]; type BnfProduction = { name: string; alts: BnfSequence[]; incremental?: boolean; probeDispatch?: ProbeDispatchSpec; probeHelper?: { vocabElements: BnfElement[]; }; nodeKind?: 'user' | 'core' | 'helper'; }; type ProbeDispatchSpec = { probeRule: string; disambiguator: BnfElement; withBranch: string; noBranch: string; }; type BnfGrammar = { productions: BnfProduction[]; ambiguities?: AmbiguityReport[]; }; type AmbiguityReport = { rule: string; altIdx: number; optIdx: number; reason: string; resolved: boolean; }; declare const bnfRules: Record void; bc?: (r: Rule) => void; open?: any[]; close?: any[]; }>; declare function eliminateLeftRecursion(grammar: BnfGrammar): BnfGrammar; declare class BnfParseError extends Error { readonly line?: number; readonly column?: number; readonly cause?: unknown; constructor(message: string, location?: { line?: number; column?: number; }, cause?: unknown); } declare function parseBnf(src: string): BnfGrammar; declare function emitGrammarSpec(grammar: BnfGrammar, opts?: BnfConvertOptions): GrammarSpec; declare function bnf(src: string, opts?: BnfConvertOptions): GrammarSpec; export { bnf, parseBnf, emitGrammarSpec, eliminateLeftRecursion, bnfRules, BnfParseError, }; ================================================ FILE: dist/bnf.js ================================================ "use strict"; /* Copyright (c) 2025 Richard Rodger and other contributors, MIT License */ Object.defineProperty(exports, "__esModule", { value: true }); exports.BnfParseError = exports.bnfRules = void 0; exports.bnf = bnf; exports.parseBnf = parseBnf; exports.emitGrammarSpec = emitGrammarSpec; exports.eliminateLeftRecursion = eliminateLeftRecursion; // Declarative definition of the BNF grammar itself, expressed as // jsonic rules. Each rule names its `open`/`close` alt list and, where // necessary, a `bo`/`bc` state hook for AST assembly. // // Stage 8: incremental alternatives via `name =/ alt` now fold // into the earlier production with the same name. Quoted strings // default to case-insensitive (ABNF semantics), `%s` / `%i` force // sensitivity explicitly, numeric values and repetition prefixes // work as in previous stages. // // Token vocabulary: // #DEF `=` (rule-definition operator) // #DEFA `=/` (incremental-alternatives operator) // #ALT `/` (alternation) // #STAR `*` (repetition separator) // #NUM decimal repetition count (matched via match.token) // #NV `%[xdb]NN[(-NN|(.NN)*)]` numeric value (match.token) // #SS `%s` (case-sensitive string prefix) // #SI `%i` (case-insensitive string prefix — same as default) // #LP `(` // #RP `)` // #OB `[` (optional-group open) // #CB `]` (optional-group close) // #TX bare identifier (jsonic default text token) // #ST quoted string literal (jsonic default string token) // #ZZ end-of-source // // Grammar: // bnf = production* // production = IDENT ('=' / '=/') alts // alts = seq ('/' seq)* // seq = element* // element = repetition? atom // repetition = NUM '*' NUM / NUM '*' / '*' NUM / '*' / NUM // atom = IDENT | STRING | ['%s' | '%i'] STRING | NUMVAL // | '(' alts ')' | '[' alts ']' // numval = '%' ('x' / 'd' / 'b') DIGITS [ '-' DIGITS | ('.' DIGITS)* ] const bnfRules = { // Top-level: accumulates productions into r.node. bnf: { bo: (r) => { r.node = []; }, open: [ { s: '#ZZ', g: 'empty' }, { p: 'prod' }, ], close: [{ s: '#ZZ' }], }, // One production per invocation; tail-recurses (r:'prod') for the // next. Inherits its parent's node (the productions array) and // appends to it in `bc` once its `alts` child has returned. // Production header is `IDENT =` — a bareword rule name followed // by the `=` definition operator. prod: { open: [ // Standalone definition: name = alts { s: '#TX #DEF', a: (r) => { r.u.name = r.o[0].val; r.u.incremental = false; }, p: 'alts', }, // Incremental alternatives: name =/ alts { s: '#TX #DEFA', a: (r) => { r.u.name = r.o[0].val; r.u.incremental = true; }, p: 'alts', }, ], close: [ // A TX followed by `=` or `=/` means the next production has // begun — back up 2 tokens so a fresh `prod` invocation sees // them. { s: '#TX #DEF', b: 2, r: 'prod' }, { s: '#TX #DEFA', b: 2, r: 'prod' }, { b: 1 }, ], bc: (r) => { if (r.child && r.child.node !== undefined) { const prod = { name: r.u.name, alts: r.child.node }; if (r.u.incremental) prod.incremental = true; r.node.push(prod); } }, }, // A list of alternative sequences separated by `/` (ABNF // alternation). Owns its own array (`bo` resets it) and pushes // each seq result in `bc`. alts: { bo: (r) => { r.node = []; }, open: [{ p: 'seq' }], close: [ { s: '#ALT', p: 'seq' }, { b: 1 }, ], bc: (r) => { if (r.child && r.child.node !== undefined) { r.node.push(r.child.node); } }, }, // A (possibly empty) sequence of elements. The 2-token lookahead // `#TX #DEF` detects a following production boundary and bails // out without consuming the tokens; a plain `#TX` at the leading // position (tried later so the longer alt wins) is a rule // reference inside the current sequence. seq: { bo: (r) => { r.node = []; }, open: [ { s: '#TX #DEF', b: 2, g: 'end' }, { s: '#TX #DEFA', b: 2, g: 'end' }, { s: '#ALT', b: 1, g: 'end' }, { s: '#ZZ', b: 1, g: 'end' }, { s: '#RP', b: 1, g: 'end' }, { s: '#CB', b: 1, g: 'end' }, // Listing element-starter tokens in `s:` here ensures the // tcol-driven matcher considers each one when lexing. { s: '#ST', b: 1, p: 'elem' }, { s: '#NV', b: 1, p: 'elem' }, { s: '#SS', b: 1, p: 'elem' }, { s: '#SI', b: 1, p: 'elem' }, { s: '#TX', b: 1, p: 'elem' }, { s: '#LP', b: 1, p: 'elem' }, { s: '#OB', b: 1, p: 'elem' }, { s: '#STAR', b: 1, p: 'elem' }, { s: '#NUM', b: 1, p: 'elem' }, { p: 'elem' }, ], close: [ { s: '#TX #DEF', b: 2, g: 'end' }, { s: '#TX #DEFA', b: 2, g: 'end' }, { s: '#ALT', b: 1, g: 'end' }, { s: '#ZZ', b: 1, g: 'end' }, { s: '#RP', b: 1, g: 'end' }, { s: '#CB', b: 1, g: 'end' }, { s: '#ST', b: 1, p: 'elem' }, { s: '#NV', b: 1, p: 'elem' }, { s: '#SS', b: 1, p: 'elem' }, { s: '#SI', b: 1, p: 'elem' }, { s: '#TX', b: 1, p: 'elem' }, { s: '#LP', b: 1, p: 'elem' }, { s: '#OB', b: 1, p: 'elem' }, { s: '#STAR', b: 1, p: 'elem' }, { s: '#NUM', b: 1, p: 'elem' }, { b: 1 }, ], }, // One element: an optional ABNF repetition prefix (`*A`, `1*A`, // `m*nA`, `*nA`, `m*A`, `nA`) followed by an atom. The prefix is // matched up front, stored on `r.u.min`/`r.u.max`; then `atom` is // pushed to parse the actual element body, whose result is wrapped // into an AST node and appended to the parent seq's array in close. elem: { bo: (r) => { r.u.min = 1; r.u.max = 1; }, open: [ // NUM '*' NUM — bounded repetition, followed by the atom // itself (listed via the ATOM tokenset so every atom-starter // tin — including `#NV` — is in tcol for this position). { s: '#NUM #STAR #NUM #ATOM', b: 1, a: (r) => { r.u.min = parseInt(r.o[0].src, 10); r.u.max = parseInt(r.o[2].src, 10); }, p: 'atom', }, // NUM '*' — at-least-NUM repetition followed by an atom. { s: '#NUM #STAR #ATOM', b: 1, a: (r) => { r.u.min = parseInt(r.o[0].src, 10); r.u.max = Infinity; }, p: 'atom', }, // '*' NUM — at-most-NUM repetition. { s: '#STAR #NUM #ATOM', b: 1, a: (r) => { r.u.min = 0; r.u.max = parseInt(r.o[1].src, 10); }, p: 'atom', }, // '*' — zero-or-more. { s: '#STAR #ATOM', b: 1, a: (r) => { r.u.min = 0; r.u.max = Infinity; }, p: 'atom', }, // NUM — exact repetition count. { s: '#NUM #ATOM', b: 1, a: (r) => { const n = parseInt(r.o[0].src, 10); r.u.min = n; r.u.max = n; }, p: 'atom', }, // No prefix — push atom directly (min = max = 1). { p: 'atom' }, ], close: [{ // Wrap the returned atom (r.child.node) based on r.u.min/max // and append to the parent seq's array. a: (r) => { const item = r.child.node; const { min, max } = r.u; if (min === 1 && max === 1) { r.node.push(item); } else if (min === 0 && max === Infinity) { r.node.push({ kind: 'star', inner: item }); } else if (min === 1 && max === Infinity) { r.node.push({ kind: 'plus', inner: item }); } else if (min === 0 && max === 1) { r.node.push({ kind: 'opt', inner: item }); } else { r.node.push({ kind: 'rep', min, max, inner: item }); } }, }], }, // The atom body — a bareword ref, quoted-string terminal, // parenthesised group, or bracketed optional. Sets its OWN r.node // to the AST element so the enclosing `elem` rule can read it // from `r.child.node` in its close state. atom: { bo: (r) => { r.node = undefined; }, open: [ // Case-sensitive string: %s"foo" { s: '#SS #ST', a: (r) => { r.node = { kind: 'term', literal: r.o[1].val, caseSensitive: true, }; }, }, // Case-insensitive string: %i"foo" (same as bare "foo" below, // but spelled explicitly). { s: '#SI #ST', a: (r) => { r.node = { kind: 'term', literal: r.o[1].val }; }, }, // Bare quoted string — case-insensitive per ABNF default. { s: '#ST', a: (r) => { r.node = { kind: 'term', literal: r.o[0].val }; }, }, { s: '#NV', a: (r) => { r.node = parseNumericValue(r.o[0].src); }, }, { s: '#TX', a: (r) => { r.node = { kind: 'ref', name: r.o[0].val }; }, }, { s: '#LP', a: (r) => { r.u.groupKind = 'group'; }, p: 'alts', }, { s: '#OB', a: (r) => { r.u.groupKind = 'opt'; }, p: 'alts', }, ], close: [ { s: '#RP', c: (r) => r.u.groupKind === 'group', a: (r) => { r.node = { kind: 'group', alts: r.child.node }; }, }, { s: '#CB', c: (r) => r.u.groupKind === 'opt', a: (r) => { r.node = { kind: 'opt', inner: { kind: 'group', alts: r.child.node }, }; }, }, // For simple atoms (string/ref), r.node is already set by // open; we want to pop without consuming the next token. // List every token that can legitimately follow an atom so // the lexer's tcol-driven match-matcher emits #NUM, #STAR, // and friends as their proper types here — otherwise the // default number-matcher would lex `1` as #NR and the // enclosing seq.close wouldn't recognise the digit as the // start of a repetition prefix. { s: '#TX', b: 1 }, { s: '#ST', b: 1 }, { s: '#NV', b: 1 }, { s: '#SS', b: 1 }, { s: '#SI', b: 1 }, { s: '#NUM', b: 1 }, { s: '#STAR', b: 1 }, { s: '#LP', b: 1 }, { s: '#OB', b: 1 }, { s: '#RP', b: 1 }, { s: '#CB', b: 1 }, { s: '#ALT', b: 1 }, { s: '#DEF', b: 1 }, { s: '#ZZ', b: 1 }, { b: 1 }, ], }, }; exports.bnfRules = bnfRules; // Lazily built jsonic instance that parses BNF source. Deferred // construction avoids a circular-import failure at module load time. let _bnfParser = null; function getBnfParser() { if (_bnfParser) return _bnfParser; const { Jsonic } = require('./jsonic'); const j = Jsonic.make({ rule: { start: 'bnf' }, fixed: { token: { // Clear JSON-oriented defaults we're not using so `:`, `,` // and `{` have no special meaning inside BNF source. '#OS': null, '#CS': null, '#CL': null, '#CA': null, // Re-map `#OB` / `#CB` from JSON's `{` / `}` to ABNF's // `[` / `]` optional-group brackets. '#OB': '[', '#CB': ']', '#DEF': '=', // `=/` — ABNF's incremental-alternatives operator. Longer // than `=`, so jsonic's longest-match-wins fixed matcher // tries it first. '#DEFA': '=/', '#ALT': '/', '#STAR': '*', '#LP': '(', '#RP': ')', }, }, match: { token: { // ABNF repetition counts: decimal integers. '#NUM': /^[0-9]+/, // ABNF numeric value notation: // %xNN single hex code point // %dNN single decimal code point // %bNN single binary code point // %xNN-NN hex range // %xNN.NN.NN concatenated hex code points (= string) // Digits are permissive (hex covers the decimal / binary // subsets); `parseNumericValue` re-validates against the // actual base. '#NV': /^%[xdbXDB][0-9a-fA-F]+(?:[-.][0-9a-fA-F]+)*/, // `%s` / `%i` prefixes on a quoted string. The lookahead // requires `"` so they don't steal the `%` of `%xNN`. '#SS': /^%[sS](?=")/, '#SI': /^%[iI](?=")/, }, }, tokenSet: { // Tokens that can legitimately open an atom. Declaring this // as a set lets elem.open use `#ATOM` inside its `s:` patterns // — that way the tcol at the atom-starter position includes // every matcher tin (notably #NV), so the lexer doesn't fall // through to #TX when the actual atom is `%xNN`. ATOM: ['#ST', '#NV', '#TX', '#LP', '#OB', '#SS', '#SI'], }, comment: { // ABNF uses `;` to start a line comment. Override jsonic's // default `hash` definition (which used `#`) and disable the // other comment styles so `//` and `/* */` aren't confused // with the alternation operator. def: { hash: { line: true, start: ';', lex: true, eatline: false }, slash: null, multi: null, }, }, }); // Drop the default JSON rules — they would otherwise compete with // ours for the starting token set. const existing = j.rule(); for (const name of Object.keys(existing)) { j.rule(name, null); } for (const name of Object.keys(bnfRules)) { const spec = bnfRules[name]; j.rule(name, (rs) => { if (spec.bo) rs.bo(spec.bo); if (spec.bc) rs.bc(spec.bc); if (spec.open) rs.open(spec.open); if (spec.close) rs.close(spec.close); }); } _bnfParser = (src) => j(src); return _bnfParser; } // Rewrite a grammar so that the only element kinds remaining are // `term` and `ref`. Each `X?`, `X*`, `X+` occurrence is replaced by a // reference to a newly-generated helper production that expresses the // same language in plain BNF. // Eliminate left recursion — both direct (P → P α) and indirect // (P → Q α, Q → P β) — via Paull's algorithm. // // Order the productions, and for each A_i walk back over A_1..A_{i-1} // inlining any leading reference into A_i's alternatives. Once the // only remaining leading self-reference on A_i is direct, rewrite to // the iterative form // P → (β_1 | … | β_m) (α_1 | … | α_n)* // which jsonic's push-down parser can execute without re-entering P // at the same source position. // // The substitution step can duplicate alternatives, so pathological // grammars will enlarge — caller is expected to keep the grammar // reasonably small (this is a first-step converter, not a full // toolchain). function eliminateLeftRecursion(grammar) { const originalOrder = grammar.productions.map((p) => p.name); // Order productions so that rules referenced at a leading position // are processed before the rules that reference them. Paull's // substitution inlines A_j's alts into A_i for j < i, so putting // dependencies first is what makes nullable-prefixed hidden left // recursion reachable by the substitution step. // // Note: substitution here always runs, even for cycle-free // grammars. The reason is pragmatic rather than theoretical — // populating tcol from multi-token altPrefixes (needed so the // lexer's regex matchers fire with the right tin in nested // contexts) requires the full inlined shape. A future refactor // could compute tcol from the un-substituted grammar and only // apply Paull's to the cyclic SCCs, which would preserve more // named-rule structure in the emitted AST. let prods = topoOrderForPaull(grammar.productions.map((p) => ({ name: p.name, alts: p.alts.map((a) => a.slice()), nodeKind: p.nodeKind, }))); for (let i = 0; i < prods.length; i++) { // For each earlier production A_j, inline any alternative of // A_i whose leading element is a reference to A_j. for (let j = 0; j < i; j++) { prods[i] = substituteLeadingRef(prods[i], prods[j]); } prods[i] = eliminateDirectLeftRec(prods[i]); } // Restore the caller's declared order, so the start rule still // ends up first (and the user sees their rule names in a // recognisable order when inspecting the spec). const byName = new Map(prods.map((p) => [p.name, p])); const ordered = []; for (const name of originalOrder) { const p = byName.get(name); if (p) { ordered.push(p); byName.delete(name); } } // Any generated productions created during substitution (none in // the current implementation) would fall through here. for (const p of byName.values()) ordered.push(p); return { productions: ordered }; } // Tarjan-flavoured SCC scan over the leading-reference graph: // returns the names of productions that participate in at least one // cycle (self-loop or longer). Used to scope Paull's substitution to // only the rules that actually need it. function findLeadingRefCycleMembers(prods) { const byName = new Map(prods.map((p) => [p.name, p])); const leadingRefs = (p) => { const out = []; for (const alt of p.alts) { if (alt.length === 0) continue; const first = alt[0]; if (first.kind === 'ref' && byName.has(first.name)) out.push(first.name); } return out; }; // Tarjan's SCC algorithm. let index = 0; const stack = []; const onStack = new Set(); const indices = new Map(); const lowlinks = new Map(); const cyclic = new Set(); function strongConnect(name) { indices.set(name, index); lowlinks.set(name, index); index++; stack.push(name); onStack.add(name); const prod = byName.get(name); if (prod) { for (const target of leadingRefs(prod)) { if (!indices.has(target)) { strongConnect(target); lowlinks.set(name, Math.min(lowlinks.get(name), lowlinks.get(target))); } else if (onStack.has(target)) { lowlinks.set(name, Math.min(lowlinks.get(name), indices.get(target))); } } } if (lowlinks.get(name) === indices.get(name)) { // Pop the SCC. If it has more than one member, or it's a // single member with a self-loop, mark as cyclic. const scc = []; let w; do { w = stack.pop(); onStack.delete(w); scc.push(w); } while (w !== name); const isCycle = scc.length > 1 || (scc.length === 1 && leadingRefs(byName.get(scc[0])).includes(scc[0])); if (isCycle) for (const n of scc) cyclic.add(n); } } for (const p of prods) { if (!indices.has(p.name)) strongConnect(p.name); } return cyclic; } // Topological order over the "leading-position reference" graph: // an edge A → B exists when A has at least one alternative whose // first element is a reference to B. Cycles are preserved as-is // (Paull's handles them via the substitution + direct-LR rewrite). function topoOrderForPaull(prods) { const byName = new Map(prods.map((p) => [p.name, p])); const colour = new Map(); // 0 unseen, 1 in-progress, 2 done const order = []; function visit(name) { const c = colour.get(name) ?? 0; if (c !== 0) return; // already seen or on the current path colour.set(name, 1); const p = byName.get(name); if (p) { for (const alt of p.alts) { if (alt.length > 0 && alt[0].kind === 'ref' && byName.has(alt[0].name)) { visit(alt[0].name); } } colour.set(name, 2); order.push(p); } else { colour.set(name, 2); } } for (const p of prods) visit(p.name); return order; } // For every alternative of `target` that begins with a ref to // `source`, replace that alt with |source.alts| copies — each one // with the leading source-ref expanded to one of source's alts. function substituteLeadingRef(target, source) { const newAlts = []; for (const alt of target.alts) { if (alt.length > 0 && alt[0].kind === 'ref' && alt[0].name === source.name) { const tail = alt.slice(1); for (const srcAlt of source.alts) { newAlts.push([...srcAlt, ...tail]); } } else { newAlts.push(alt); } } return { name: target.name, alts: newAlts, nodeKind: target.nodeKind }; } // Rewrite a single production's direct left recursion to its // iterative equivalent. Equivalent to the previous version of // `eliminateLeftRecursion` but scoped to one production. function eliminateDirectLeftRec(prod) { const recursive = []; const seeds = []; for (const alt of prod.alts) { if (alt.length > 0 && alt[0].kind === 'ref' && alt[0].name === prod.name) { recursive.push(alt.slice(1)); } else { seeds.push(alt); } } // A trivial recursive alt `[P]` (P ::= P, nothing else) would // derive P from P with no progress — semantically a no-op. Drop // them silently, since nullable-prefix expansion in Paull's can // legitimately produce them and erroring would hide a legal // grammar. const nonTrivialRecursive = recursive.filter((t) => t.length > 0); if (nonTrivialRecursive.length === 0) { // Either no recursion at all, or only trivial self-refs — keep // just the seeds. return { name: prod.name, alts: seeds, nodeKind: prod.nodeKind }; } if (seeds.length === 0) { throw new Error(`bnf: rule '${prod.name}' is purely left-recursive ` + `(no seed alternative); cannot eliminate`); } const seedElement = seeds.length === 1 && seeds[0].length === 1 ? seeds[0][0] : { kind: 'group', alts: seeds }; const tailInner = nonTrivialRecursive.length === 1 && nonTrivialRecursive[0].length === 1 ? nonTrivialRecursive[0][0] : { kind: 'group', alts: nonTrivialRecursive }; return { name: prod.name, alts: [[seedElement, { kind: 'star', inner: tailInner }]], nodeKind: prod.nodeKind, }; } function desugar(grammar) { const extra = []; const used = new Set(grammar.productions.map((p) => p.name)); function freshName(hint) { // Collision-avoiding name like `_gen1`, `_gen2`, … let i = extra.length; let name; do { i++; name = `_gen${i}_${hint}`; } while (used.has(name)); used.add(name); return name; } function desugarAlt(alt) { return alt.map(desugarElement); } function desugarElement(el) { if (el.kind === 'term' || el.kind === 'ref' || el.kind === 'regex') { return el; } if (el.kind === 'group') { // Recurse into the group's alts so nested sugar is flattened, // then emit a helper production whose body is those alts. const innerAlts = el.alts.map((a) => desugarAlt(a)); const name = freshName('group'); extra.push({ name, alts: innerAlts, nodeKind: 'helper' }); return { kind: 'ref', name }; } // `opt`, `star`, `plus` all wrap a single inner element. const inner = desugarElement(el.inner); const hint = inner.kind === 'ref' ? inner.name : inner.kind === 'term' ? 'term' : 'x'; if (el.kind === 'opt') { // H ::= inner | (empty) const name = freshName('opt_' + hint); extra.push({ name, alts: [[inner], []], nodeKind: 'helper' }); return { kind: 'ref', name }; } if (el.kind === 'star') { // H = inner H / (empty) const name = freshName('star_' + hint); const selfRef = { kind: 'ref', name }; extra.push({ name, alts: [[inner, selfRef], []], nodeKind: 'helper' }); return { kind: 'ref', name }; } if (el.kind === 'plus') { // H = inner Tail where Tail = inner Tail / (empty) const tailName = freshName('star_' + hint); const plusName = freshName('plus_' + hint); const tailRef = { kind: 'ref', name: tailName }; extra.push({ name: tailName, alts: [[inner, tailRef], []], nodeKind: 'helper', }); extra.push({ name: plusName, alts: [[inner, tailRef]], nodeKind: 'helper', }); return { kind: 'ref', name: plusName }; } // ABNF m*n bounded repetition. Desugars to a concatenation of // `min` mandatory copies of the inner element followed by a // tail that accepts up to `(max - min)` more. // m*n A => A{m} [A[A[A...[A]]]] (nested optionals) // m* A => A{m} *A (mandatory prefix + star) // *n A => [A [A ... [A]]] (n nested optionals) // The helper's single alt has `min` repetitions of inner, then // either a star-helper for (min, ∞) or `max - min` nested // optionals for a finite range. const { min, max } = el; const repName = freshName('rep_' + hint); const repAlt = []; for (let i = 0; i < min; i++) repAlt.push(inner); if (max === Infinity) { // Tail: unbounded star of inner. const tailStarName = freshName('star_' + hint); const tailStarRef = { kind: 'ref', name: tailStarName }; extra.push({ name: tailStarName, alts: [[inner, tailStarRef], []], nodeKind: 'helper', }); repAlt.push(tailStarRef); } else { // Nest (max - min) optionals: [A [A [A ...]]]. let nested = []; for (let i = 0; i < max - min; i++) { // Wrap current `nested` into an optional and prepend `inner`. if (nested.length === 0) { nested = [{ kind: 'opt', inner: { kind: 'group', alts: [[inner]] } }]; } else { nested = [{ kind: 'opt', inner: { kind: 'group', alts: [[inner, ...nested]] }, }]; } } repAlt.push(...nested); } extra.push({ name: repName, alts: [desugarAlt(repAlt)], nodeKind: 'helper' }); return { kind: 'ref', name: repName }; } const rewritten = grammar.productions.map((p) => { const out = { name: p.name, alts: p.alts.map(desugarAlt), nodeKind: p.nodeKind, }; // Probe-dispatch flags survive desugar unchanged — the emitter // routes around the standard alt-compilation path for these. if (p.probeDispatch) out.probeDispatch = p.probeDispatch; if (p.probeHelper) out.probeHelper = p.probeHelper; return out; }); return { productions: [...rewritten, ...extra] }; } // Error raised when the BNF source itself can't be parsed. Surfaces // line and column from the underlying jsonic error so the caller can // report them directly. The original error is kept on `.cause`. class BnfParseError extends Error { constructor(message, location, cause) { super(message); this.name = 'BnfParseError'; this.line = location?.line; this.column = location?.column; this.cause = cause; } } exports.BnfParseError = BnfParseError; // Parse BNF source into a grammar AST via the jsonic-based parser. function parseBnf(src) { const parser = getBnfParser(); let productions; try { productions = parser(src) ?? []; } catch (e) { // JsonicError carries `lineNumber` / `columnNumber`; fall back to // ad-hoc extraction from the error message otherwise. const line = e?.lineNumber ?? e?.row; const column = e?.columnNumber ?? e?.col; const loc = (line != null && column != null) ? ` at line ${line}, column ${column}` : ''; const raw = e?.message ? String(e.message).split('\n')[0] : String(e); throw new BnfParseError(`bnf: parse error${loc}: ${raw}`, { line, column }, e); } if (!Array.isArray(productions) || productions.length === 0) { throw new BnfParseError('bnf: no productions found'); } const merged = mergeIncrementals(productions); return { productions: withCoreRules(merged) }; } // RFC 5234 Appendix B.1 core rules. Parsed lazily on first use // and spliced into any user grammar that references them but // doesn't define them locally. const CORE_RULES_ABNF = ` ALPHA = %x41-5A / %x61-7A BIT = "0" / "1" CHAR = %x01-7F CR = %x0D LF = %x0A CRLF = CR LF CTL = %x00-1F / %x7F DIGIT = %x30-39 DQUOTE = %x22 HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" HTAB = %x09 OCTET = %x00-FF SP = %x20 VCHAR = %x21-7E WSP = SP / HTAB `; let _coreRules = null; function getCoreRules() { if (_coreRules) return _coreRules; const parser = getBnfParser(); const raw = parser(CORE_RULES_ABNF); // Core rules flatten to `src` in the output AST — they're // character-class bricks, not structural nodes users want to see // one-per-matched-character. for (const p of raw) p.nodeKind = 'core'; _coreRules = new Map(raw.map((p) => [p.name, p])); return _coreRules; } function refsIn(alt, out) { for (const el of alt) { if (el.kind === 'ref') out.add(el.name); else if (el.kind === 'opt' || el.kind === 'star' || el.kind === 'plus' || el.kind === 'rep') { refsIn([el.inner], out); } else if (el.kind === 'group') { for (const a of el.alts) refsIn(a, out); } } } // Add each RFC 5234 core rule that the user's grammar references // but doesn't define locally. Resolution is transitive: if the // user mentions HEXDIG, DIGIT is pulled in too. User definitions // always win — a local `DIGIT = …` is left untouched. function withCoreRules(user) { const core = getCoreRules(); const defined = new Set(user.map((p) => p.name)); const needed = new Set(); const scan = (prods) => { for (const p of prods) { for (const alt of p.alts) refsIn(alt, needed); } }; scan(user); const out = []; // Transitively add core rules, in declaration order. let added = true; while (added) { added = false; for (const [name, prod] of core) { if (defined.has(name)) continue; if (!needed.has(name)) continue; defined.add(name); out.push(prod); scan([prod]); added = true; } } return [...user, ...out]; } // Fold every `name =/ alt` production into the earlier production // with the same name by appending its alternatives. Throws if an // incremental references a name that hasn't been defined yet — ABNF // requires the base production to appear first. function mergeIncrementals(prods) { const out = []; const byName = new Map(); for (const p of prods) { if (p.incremental) { const base = byName.get(p.name); if (!base) { throw new BnfParseError(`bnf: '${p.name} =/ …' has no earlier '${p.name} = …' to extend`); } base.alts.push(...p.alts); continue; } // Strip the (absent) flag on a cleanly-written production so // downstream code never sees it. const clean = { name: p.name, alts: p.alts }; if (p.nodeKind) clean.nodeKind = p.nodeKind; out.push(clean); byName.set(p.name, clean); } return out; } // -- Probe-dispatch analyser + rewriter ----------------------------- // // ABNF has a large family of grammars that aren't LL(k) for any // bounded k. The canonical example is RFC 3986's `authority`: // // authority = [ userinfo "@" ] host [ ":" port ] // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) // host = IP-literal / IPv4address / reg-name // reg-name = *( unreserved / pct-encoded / sub-delims ) // // `userinfo` and `reg-name` share a character vocabulary, so a // FIRST-set dispatcher can't decide which branch the optional // `[ userinfo "@" ]` belongs to — the disambiguating `@` can be // arbitrarily far from the start. // // For the common pattern `[X D] Y` — an optional group whose body // ends with a terminal D, followed by a sequence Y whose leading // terminals overlap with X's — we handle the ambiguity by rewriting // the rule to a probe+phase-retry dispatcher: // // 1. On first entry (phase 0), mark the token position and push a // failure-proof probe rule that greedily consumes every token // in the joint vocabulary of X and Y. // 2. When the probe returns, peek ctx.t[0]: // D seen → phase = 1 (take the `X D Y` branch) // D absent → phase = 2 (take the `Y` branch) // Rewind to the mark and `r:` back into the dispatcher. // 3. The dispatcher's open has a `c:`-guarded alt for each phase // that pushes the corresponding committed branch. // // The primitives used (`r:`, `k:`, `c:`, `ctx.mark`, `ctx.rewind`, // `ctx.t`) are the same building blocks rules/parser already exposes // — no new jsonic machinery is needed. // Predicate: element is `[ X D ]` where X is one or more elements // and D is a terminal literal or a regex terminal. function isProbeableOpt(el) { if (el.kind !== 'opt') return null; const inner = el.inner; if (inner.kind !== 'group') return null; if (inner.alts.length !== 1) return null; const seq = inner.alts[0]; if (seq.length < 2) return null; const last = seq[seq.length - 1]; if (last.kind !== 'term' && last.kind !== 'regex') return null; return { xSeq: seq.slice(0, -1), disambiguator: last }; } // Union of every terminal reachable by walking an element's subtree, // following refs transitively. Cycles are broken by the visited set. // Returns terminals as BnfElements so the caller isn't tied to the // emitter's token-allocation step. function collectTerminalVocabElements(el, grammar, out, visited) { if (el.kind === 'term') { const k = termKey(el); if (!out.has(k)) out.set(k, el); return; } if (el.kind === 'regex') { const k = regexKey(el); if (!out.has(k)) out.set(k, el); return; } if (el.kind === 'ref') { if (visited.has(el.name)) return; visited.add(el.name); const prod = grammar.productions.find((p) => p.name === el.name); if (!prod) return; for (const alt of prod.alts) for (const sub of alt) collectTerminalVocabElements(sub, grammar, out, visited); return; } if (el.kind === 'opt' || el.kind === 'star' || el.kind === 'plus' || el.kind === 'rep') { collectTerminalVocabElements(el.inner, grammar, out, visited); return; } if (el.kind === 'group') { for (const alt of el.alts) for (const sub of alt) collectTerminalVocabElements(sub, grammar, out, visited); return; } } function collectSeqVocabElements(seq, grammar) { const out = new Map(); const visited = new Set(); for (const el of seq) collectTerminalVocabElements(el, grammar, out, visited); return out; } function mapsOverlap(a, b) { for (const x of a.keys()) if (b.has(x)) return true; return false; } // Rewrite every ambiguous `[X D] Y` subsequence in `grammar` into a // probe-dispatch pattern. The grammar at this point still has `opt`, // `group`, `star`, `plus`, `rep` sugar — intentionally, since that's // where the pattern is easy to recognise. Runs BEFORE token // allocation; probe metadata stores BnfElements, and the emitter // resolves them to token names at emit time. function rewriteProbeDispatches(grammar) { const reports = grammar.ambiguities ?? []; const extra = []; const used = new Set(grammar.productions.map((p) => p.name)); function freshName(hint) { let name = hint; let i = 1; while (used.has(name)) { name = hint + i; i++; } used.add(name); return name; } const rewritten = []; for (const prod of grammar.productions) { let newAlts = []; let touched = false; for (let altIdx = 0; altIdx < prod.alts.length; altIdx++) { const alt = prod.alts[altIdx]; let resultAlt = []; for (let i = 0; i < alt.length; i++) { const el = alt[i]; const info = isProbeableOpt(el); if (!info) { resultAlt.push(el); continue; } const ySeq = alt.slice(i + 1); if (ySeq.length === 0) { // `[X D]` is the last thing in the alt — nothing follows, so // there's nothing to disambiguate against. Standard emit. resultAlt.push(el); continue; } const xVocab = collectSeqVocabElements(info.xSeq, grammar); const yVocab = collectSeqVocabElements(ySeq, grammar); if (!mapsOverlap(xVocab, yVocab)) { // The optional's leading tokens don't overlap with the tail's // leading tokens, so the normal FIRST-based dispatcher can // decide. No rewrite needed. resultAlt.push(el); continue; } // Joint vocab: union of everything the probe might need to // consume. Includes the disambiguator, which we then remove so // the probe stops on it and the peek works. const vocab = new Map([...xVocab, ...yVocab]); const d = info.disambiguator; const dKey = d.kind === 'term' ? termKey(d) : d.kind === 'regex' ? regexKey(d) : null; if (dKey) vocab.delete(dKey); const dispatchName = freshName(`${prod.name}$pd${i}`); const probeName = freshName(`${dispatchName}$probe`); const withName = freshName(`${dispatchName}$with`); const noName = freshName(`${dispatchName}$no`); // Synthesise the probe helper. extra.push({ name: probeName, alts: [], probeHelper: { vocabElements: [...vocab.values()] }, nodeKind: 'helper', }); // Synthesise the committed branches. `with` = X D Y, `no` = Y. extra.push({ name: withName, alts: [[...info.xSeq, info.disambiguator, ...ySeq]], nodeKind: 'helper', }); extra.push({ name: noName, alts: [ySeq], nodeKind: 'helper', }); // Synthesise the dispatcher. The `alts` list is a "virtual" // spec — two ref-only alts — that exists solely to feed // computeFirstSets the right FIRST/nullable answers (FIRST // = FIRST(with) ∪ FIRST(no)). The emitter checks // `probeDispatch` first and emits the phase-retry body // instead of compiling `alts`. extra.push({ name: dispatchName, alts: [ [{ kind: 'ref', name: withName }], [{ kind: 'ref', name: noName }], ], probeDispatch: { probeRule: probeName, disambiguator: info.disambiguator, withBranch: withName, noBranch: noName, }, nodeKind: 'helper', }); reports.push({ rule: prod.name, altIdx, optIdx: i, reason: `optional prefix shares vocabulary with tail`, resolved: true, }); resultAlt.push({ kind: 'ref', name: dispatchName }); // Everything that followed the opt is now inside the dispatcher // (withBranch / noBranch), so skip the rest of the alt. i = alt.length; touched = true; } newAlts.push(resultAlt); } if (touched) { rewritten.push({ name: prod.name, alts: newAlts, nodeKind: prod.nodeKind, }); } else { rewritten.push(prod); } } return { productions: [...rewritten, ...extra], ambiguities: reports, }; } // Emit a probe helper production. A self-looping rule that matches any // one of the vocab tokens and restarts; a final empty-alt fallback // ensures the rule NEVER fails — if the current lookahead isn't in the // vocab (or we're at #ZZ), the rule pops cleanly. This is the // failure-proof property the probe pattern relies on. function emitProbeHelper(prod, tag, ruleSpec, literals, regexTokens) { const elems = prod.probeHelper.vocabElements; const opens = []; for (const el of elems) { const tok = el.kind === 'term' ? literals.get(termKey(el)) : el.kind === 'regex' ? regexTokens.get(regexKey(el)) : undefined; if (tok) opens.push({ s: tok, r: prod.name, g: tag }); } // Empty fallback — pops without consuming anything. Must be last. opens.push({ g: tag }); ruleSpec[prod.name] = { open: opens }; } // Emit a probe-dispatch production. Encodes the three-phase retry // pattern; uses only standard jsonic primitives (r:, p:, c:, k:, // ctx.mark/rewind/t). function emitProbeDispatch(prod, tag, ruleSpec, refs, literals, regexTokens) { const { probeRule, disambiguator, withBranch, noBranch } = prod.probeDispatch; const disambiguatorToken = disambiguator.kind === 'term' ? literals.get(termKey(disambiguator)) : disambiguator.kind === 'regex' ? regexTokens.get(regexKey(disambiguator)) : undefined; if (!disambiguatorToken) { throw new Error(`bnf: probe-dispatch rule '${prod.name}' has unresolvable ` + `disambiguator (kind=${disambiguator.kind})`); } const initMark = refs.register((r, ctx) => { r.k.pd_phase = 0; r.k.pd_mark = ctx.mark(); }); const decide = refs.register((r, ctx) => { // ctx.t[0] is the first token the probe didn't consume. The probe // never fails, so this always reflects a real position. const peek = ctx.t[0]; ctx.rewind(r.k.pd_mark); const matched = peek && peek.name === disambiguatorToken; r.k.pd_phase = matched ? 1 : 2; }); const bubble = refs.register((r) => { if (r.child && r.child.node !== undefined) r.node = r.child.node; }); ruleSpec[prod.name] = { open: [ // Phase 0 — first pass: mark and probe. { c: refs.register((r) => !r.k.pd_phase), a: initMark, p: probeRule, g: tag, }, // Phase 1 — disambiguator was seen: commit to X D Y. { c: refs.register((r) => r.k.pd_phase === 1), p: withBranch, g: tag, }, // Phase 2 — disambiguator was not seen: commit to Y alone. { c: refs.register((r) => r.k.pd_phase === 2), p: noBranch, g: tag, }, ], close: [ // Phase 0 close: decide phase based on peek, rewind, retry self. { c: refs.register((r) => r.k.pd_phase === 0), a: decide, r: prod.name, g: tag, }, // Phase 1 / 2 close: lift the committed child's node up. { a: bubble, g: tag }, ], }; } // Convert a BNF grammar AST into a jsonic GrammarSpec. function emitGrammarSpec(grammar, opts) { const start = opts?.start ?? grammar.productions[0].name; const tag = opts?.tag ?? 'bnf'; // Eliminate direct left recursion (P → P α | β) by rewriting to // the equivalent right-recursive form P → β (α)*, then detect // ambiguous `[X D] Y` optional-prefix patterns and rewrite them // into probe-dispatch helpers; finally flatten any EBNF sugar // (`?`, `*`, `+`, grouping) into plain BNF. grammar = eliminateLeftRecursion(grammar); grammar = rewriteProbeDispatches(grammar); grammar = desugar(grammar); // Allocate a fixed token for each unique literal, and a match // token for each unique regex terminal. Literals are keyed by // (literal, effective-case-sensitivity) so a `%s"foo"` (sensitive) // and a bare `"foo"` (insensitive) produce distinct tokens. const literals = new Map(); // literal-key -> token name const regexTokens = new Map(); // regex key -> token name const usedNames = new Set(); const fixedTokens = {}; const matchTokens = {}; for (const prod of grammar.productions) { for (const alt of prod.alts) { for (const el of alt) { if (el.kind === 'term') { const key = termKey(el); if (!literals.has(key)) { const name = allocTokenName(el.literal, usedNames); literals.set(key, name); if (isEffectivelyCaseSensitive(el)) { fixedTokens[name] = el.literal; } else { // Insensitive literal with at least one letter — emit // as an anchored regex with the `i` flag. Mark the // matcher `eager$` so jsonic's lexer fires it even // when the current rule's tcol doesn't list its tin. const re = new RegExp('^' + escapeRegExp(el.literal), 'i'); re.eager$ = true; matchTokens[name] = re; } } } else if (el.kind === 'regex') { const key = regexKey(el); if (!regexTokens.has(key)) { const name = allocTokenName('rx_' + el.pattern, usedNames); regexTokens.set(key, name); matchTokens[name] = new RegExp('^' + el.pattern, el.flags); } } } } // Probe-helper productions store their vocab as BnfElements — // walk those too so the required tokens get allocated. if (prod.probeHelper) { for (const el of prod.probeHelper.vocabElements) { if (el.kind === 'term') { const key = termKey(el); if (!literals.has(key)) { const name = allocTokenName(el.literal, usedNames); literals.set(key, name); if (isEffectivelyCaseSensitive(el)) { fixedTokens[name] = el.literal; } else { const re = new RegExp('^' + escapeRegExp(el.literal), 'i'); re.eager$ = true; matchTokens[name] = re; } } } else if (el.kind === 'regex') { const key = regexKey(el); if (!regexTokens.has(key)) { const name = allocTokenName('rx_' + el.pattern, usedNames); regexTokens.set(key, name); matchTokens[name] = new RegExp('^' + el.pattern, el.flags); } } } } } const knownRules = new Set(grammar.productions.map((p) => p.name)); const { firstSets, nullable } = computeFirstSets(grammar, literals, regexTokens); const refs = new RefRegistry(); const ruleSpec = {}; for (const prod of grammar.productions) { if (prod.probeHelper) { emitProbeHelper(prod, tag, ruleSpec, literals, regexTokens); continue; } if (prod.probeDispatch) { emitProbeDispatch(prod, tag, ruleSpec, refs, literals, regexTokens); continue; } // Standard path: a (possibly single-segment) set of alternatives // compiled to jsonic alts. Simple alts collapse into `open` alts // directly; multi-segment alts emit a chain of aux rules. emitProduction(prod, grammar, literals, regexTokens, knownRules, tag, ruleSpec, firstSets, nullable, refs); } // Wrap the user-visible start rule in a synthetic rule that // explicitly consumes #ZZ. Without this, a user rule that pops // without matching the end-of-source token lets trailing content // slip past jsonic's post-loop endtkn check (the lookahead buffer // outlives the parse loop). const startWrapper = '__start__'; ruleSpec[startWrapper] = { open: [{ p: start, g: tag, }], close: [{ s: '#ZZ', // Return the start rule's AST node directly — the `__start__` // wrapper exists only to ensure end-of-source gets consumed. // The caller of `jsonic(src)` receives the tagged user-rule // node (e.g. `{rule: 'URI', src, kids: [...]}`) unadorned. a: refs.register((r) => { if (r.child && r.child.node !== undefined) { r.node = r.child.node; } }), g: tag, }], }; const options = { fixed: { token: fixedTokens }, rule: { start: startWrapper }, }; if (Object.keys(matchTokens).length > 0) { options.match = { token: matchTokens }; } const spec = { ref: refs.map, options, rule: ruleSpec, }; return spec; } // Break an alternative into segments. Each segment is a (possibly // empty) run of terminal tokens followed by at most one rule // reference. A single-segment alt has at most one ref, located at the // very end; everything else has two or more segments. function segmentize(alt, literals, regexTokens) { const segs = []; let current = { terms: [], ref: null }; for (const el of alt) { if (el.kind === 'term') { current.terms.push(literals.get(termKey(el))); } else if (el.kind === 'regex') { const key = regexKey(el); current.terms.push(regexTokens.get(key)); } else if (el.kind === 'ref') { current.ref = el.name; segs.push(current); current = { terms: [], ref: null }; } else { // `opt`, `star`, `plus`, `group` must have been desugared // before reaching the emitter. throw new Error(`bnf: internal — unexpected element kind '${el.kind}' in emitter`); } } if (current.terms.length > 0 || segs.length === 0) { segs.push(current); } return segs; } function regexKey(el) { return `/${el.pattern}/${el.flags}`; } function isSingleSegment(alt) { let sawRef = false; for (const el of alt) { if (el.kind === 'ref') { if (sawRef) return false; sawRef = true; } else if (el.kind === 'term' || el.kind === 'regex') { if (sawRef) return false; // terminal after a ref — multi-segment } else { // Desugar should have eliminated sugar kinds. return false; } } return true; } function validateRefs(alt, knownRules, ruleName) { for (const el of alt) { if (el.kind === 'ref' && !knownRules.has(el.name)) { throw new Error(`bnf: rule '${ruleName}' references unknown rule '${el.name}'`); } } } // Registry used by the emitter to allocate unique `@`-prefixed // FuncRef names for inline action functions. The resulting spec is // still declarative: every function appears once, keyed by name, // under the spec's `ref` map. class RefRegistry { constructor() { this.refs = {}; this.counter = 0; } register(fn) { const name = `@bnf_a${this.counter++}`; this.refs[name] = fn; return name; } get map() { return this.refs; } } function mkAstNode(ruleName, nodeKind) { return nodeKind === 'user' ? { rule: ruleName, src: '', kids: [] } : { src: '', kids: [] }; } function segmentToAlt(seg, tag, refs, initNode, ruleName, nodeKind) { const spec = { g: tag }; if (seg.terms.length > 0) spec.s = seg.terms.join(' '); if (seg.ref) spec.p = seg.ref; // Default tree-building: accumulate each matched terminal's source // text into `r.node.src`. Head alts also allocate a fresh AST node // so the child doesn't inherit (and then mutate) its parent's. const nterms = seg.terms.length; if (nterms > 0 || initNode) { spec.a = refs.register((r) => { if (initNode) r.node = mkAstNode(ruleName, nodeKind); const n = r.node; for (let i = 0; i < nterms; i++) n.src += r.o[i].src; }); } return spec; } // Close-state action: merge the just-returned child rule's AST node // into the current rule's. Tagged children (user rules) get pushed // verbatim into `kids`; untagged (helper / core) flatten — their // `src` appends and their `kids` extend. Either way `src` // concatenates so every ancestor's `.src` reflects everything it // matched. function captureChildRef(refs, ruleName, nodeKind) { return refs.register((r) => { if (r.node == null) r.node = mkAstNode(ruleName, nodeKind); const n = r.node; const c = r.child && r.child.node; if (c == null) return; if (typeof c !== 'object' || !('src' in c)) { // Legacy shape — wrap as a leaf kid. n.kids.push(c); return; } // Defensive: if the child somehow shares this rule's node // object, skip the merge rather than push a self-reference. (A // properly-emitted grammar always allocates fresh child nodes.) if (c === n) return; n.src += c.src; if (c.rule) n.kids.push(c); else if (Array.isArray(c.kids)) n.kids.push(...c.kids); }); } function emitProduction(prod, grammar, literals, regexTokens, knownRules, tag, ruleSpec, firstSets, nullable, refs) { for (const alt of prod.alts) { validateRefs(alt, knownRules, prod.name); } const allSimple = prod.alts.every(isSingleSegment); if (allSimple) { // Every alternative collapses to one jsonic alt — emit them // directly into the production's open state. This is a head // rule, so each alt initialises its own node array. Empty alts // are sorted to the end so jsonic's first-match-wins doesn't let // them short-circuit non-empty alternatives. const ordered = [ ...prod.alts.filter((alt) => alt.length > 0), ...prod.alts.filter((alt) => alt.length === 0), ]; // Ref-only alternatives have no terminal to discriminate on, so // jsonic's first-match-wins would silently let them shadow any // later alternative. Guard them with FIRST-set peeks when the // production has more than one alt. const needsPeek = ordered.length > 1; const opens = []; for (const alt of ordered) { const segs = segmentize(alt, literals, regexTokens); const seg = segs[0]; const isRefOnly = alt.length >= 1 && alt.every((el) => el.kind === 'ref') && seg.terms.length === 0 && seg.ref != null; const prodKind = prod.nodeKind ?? 'user'; if (needsPeek && isRefOnly) { const firstTokens = firstOfAlt(alt, literals, regexTokens, firstSets, nullable); if (firstTokens) { for (const tok of firstTokens) { opens.push({ s: tok, b: 1, p: seg.ref, a: refs.register((r) => { r.node = mkAstNode(prod.name, prodKind); }), g: tag, }); } continue; } } opens.push(segmentToAlt(seg, tag, refs, true, prod.name, prodKind)); } const rs = { open: opens }; // If any alt has a push, the close state must capture the // returned child. Add a universal fallback close alt whose // action is a no-op when there was no push. if (prod.alts.some((alt) => alt.some((el) => el.kind === 'ref'))) { rs.close = [{ a: captureChildRef(refs, prod.name, prod.nodeKind ?? 'user'), g: tag, }]; } ruleSpec[prod.name] = rs; return; } if (prod.alts.length === 1) { // Single-alt, multi-segment: chain rules directly on the // production. emitChain(prod.name, prod.alts[0], literals, regexTokens, tag, ruleSpec, refs, prod.nodeKind ?? 'user'); return; } // Multi-alt with at least one multi-segment alternative: emit a // dispatcher. Each alt becomes its own chained impl rule // (`$alt`); the main rule's open peeks the first token // and pushes the matching impl rule. Using `p:` (not `r:`) keeps // the parent's `child` pointer valid so the parent can read the // impl's node in its close-state action. const dispatchOpen = []; let emptyAltSeen = false; for (let i = 0; i < prod.alts.length; i++) { const alt = prod.alts[i]; const implName = `${prod.name}$alt${i}`; if (alt.length === 0) { // Empty alt acts as fallback — handled after the loop. emptyAltSeen = true; continue; } emitChain(implName, alt, literals, regexTokens, tag, ruleSpec, refs, 'helper'); // Fan out this alt into one dispatch entry per concrete token // sequence it can start with. Up to LOOKAHEAD_K tokens per // prefix is enough for the grammars this converter targets; a // ref with multiple alts produces one prefix per sub-alt so // overlapping FIRST sets between competing alts can still be // separated by their second (or later) token. // The dispatcher itself is a user (or helper) rule — it must // allocate its own AST node on every dispatch alt, otherwise the // node inherited from the parent via makeRule(ctx, rule.node) // would be shared and the dispatcher's captureChildRef would // mutate the parent's tree. const dispatchKind = prod.nodeKind ?? 'user'; const initDispatchNode = refs.register((r) => { r.node = mkAstNode(prod.name, dispatchKind); }); const LOOKAHEAD_K = 4; const prefixes = altPrefixes(alt, grammar, literals, regexTokens, LOOKAHEAD_K); const usable = prefixes.filter((p) => p.length > 0); if (usable.length > 0) { for (const p of usable) { dispatchOpen.push({ s: p.join(' '), b: p.length, p: implName, a: initDispatchNode, g: tag, }); } } else { const firstTokens = firstOfAlt(alt, literals, regexTokens, firstSets, nullable); if (firstTokens === null) { throw new Error(`bnf: rule '${prod.name}' alternative ${i} is nullable ` + `but is not the only empty alt; FIRST set is ambiguous`); } for (const tok of firstTokens) { dispatchOpen.push({ s: tok, b: 1, p: implName, a: initDispatchNode, g: tag, }); } } } if (emptyAltSeen) { // Fallback: matches any token (or none), pops immediately with // an empty tree. Tagged with the user rule name so a consumer // walking the tree still gets a placeholder node for the empty // alternative. const fallbackKind = prod.nodeKind ?? 'user'; dispatchOpen.push({ a: refs.register((r) => { r.node = mkAstNode(prod.name, fallbackKind); }), g: tag, }); } ruleSpec[prod.name] = { open: dispatchOpen, close: [{ // Merge the chosen impl's result up into the dispatcher's node, // tagged with the user rule name (so the enclosing rule sees a // `{rule, src, kids}` child, not the impl chain's transparent // `{src, kids}`). a: captureChildRef(refs, prod.name, prod.nodeKind ?? 'user'), g: tag, }], }; } // Emit a (possibly single-step) chain of rules for one alt under the // given head rule name. Segment 0 goes into `headName`; later // segments get synthetic `$stepN` continuations. // // `headKind` controls the head rule's AST node shape: 'user' tags // the head's node with the rule name; 'helper' leaves it untagged // (transparent to the enclosing user rule). Step rules are always // helpers — they inherit and accumulate into the head's node via // `r:` replacement. function emitChain(headName, alt, literals, regexTokens, tag, ruleSpec, refs, headKind = 'helper') { const segs = segmentize(alt, literals, regexTokens); const chainName = (i) => i === 0 ? headName : `${headName}$step${i}`; for (let i = 0; i < segs.length; i++) { const name = chainName(i); const seg = segs[i]; const kind = i === 0 ? headKind : 'helper'; // Only the head of the chain initialises the node object; later // steps inherit and continue to accumulate into it via `r:`. const open = [segmentToAlt(seg, tag, refs, i === 0, name, kind)]; const rs = { open }; const isLast = i === segs.length - 1; if (!isLast) { // Non-last step: after the push returns, capture the child's // node and replace with the next step rule. rs.close = [{ r: chainName(i + 1), a: captureChildRef(refs, name, kind), g: tag, }]; } else if (seg.ref) { // Last step, but it had a push — we still need to capture the // final child before popping. rs.close = [{ a: captureChildRef(refs, name, kind), g: tag }]; } ruleSpec[name] = rs; } } // Compute FIRST(ref) for every production, plus which productions // are nullable (can derive the empty string). Iterates to a fixed // point. Terminals in FIRST sets are represented by their allocated // token names (e.g. `#X`). function computeFirstSets(grammar, literals, regexTokens) { const firstSets = new Map(); const nullable = new Set(); for (const p of grammar.productions) firstSets.set(p.name, new Set()); let changed = true; while (changed) { changed = false; for (const prod of grammar.productions) { const first = firstSets.get(prod.name); for (const alt of prod.alts) { // Walk the alt, accumulating FIRST until a non-nullable // position is hit. let altNullable = true; for (const el of alt) { if (el.kind === 'term' || el.kind === 'regex') { const tok = el.kind === 'term' ? literals.get(termKey(el)) : regexTokens.get(regexKey(el)); if (!first.has(tok)) { first.add(tok); changed = true; } altNullable = false; break; } if (el.kind === 'ref') { const refFirst = firstSets.get(el.name) ?? new Set(); for (const tok of refFirst) { if (!first.has(tok)) { first.add(tok); changed = true; } } if (!nullable.has(el.name)) { altNullable = false; break; } continue; } // Desugar should have eliminated other kinds. throw new Error(`bnf: internal — unexpected kind in FIRST: ${el.kind}`); } if (altNullable && !nullable.has(prod.name)) { nullable.add(prod.name); changed = true; } } } } return { firstSets, nullable }; } // FIRST set for a specific alternative (not the whole production). // Returns null if the alt is nullable — the caller must treat that // case separately (typically as a fallback empty alt). function firstOfAlt(alt, literals, regexTokens, firstSets, nullable) { const out = new Set(); for (const el of alt) { if (el.kind === 'term' || el.kind === 'regex') { const tok = el.kind === 'term' ? literals.get(termKey(el)) : regexTokens.get(regexKey(el)); out.add(tok); return out; } if (el.kind === 'ref') { const rf = firstSets.get(el.name) ?? new Set(); for (const tok of rf) out.add(tok); if (!nullable.has(el.name)) return out; // else keep walking into the next element continue; } throw new Error(`bnf: internal — unexpected kind in firstOfAlt: ${el.kind}`); } // Alt is nullable — no non-empty prefix. return null; } // Longest deterministic terminal prefix of a rule — the longest // sequence of tokens that every alternative of the rule starts // with. Refs are followed into their target rule, with a `visited` // set guarding cycles. An empty array means there's no confident // prefix (the rule either has divergent alts, starts with a multi- // alt ref, or hits a cycle), so the caller should fall back to a // single-token FIRST-set lookahead instead. function ruleLiteralPrefix(name, grammar, literals, regexTokens, visited) { if (visited.has(name)) return []; const next = new Set(visited); next.add(name); const prod = grammar.productions.find((p) => p.name === name); if (!prod || prod.alts.length === 0) return []; const prefixes = prod.alts.map((alt) => altLiteralPrefix(alt, grammar, literals, regexTokens, next)); if (prefixes.some((p) => p.length === 0)) return []; const minLen = Math.min(...prefixes.map((p) => p.length)); const common = []; for (let i = 0; i < minLen; i++) { const tok = prefixes[0][i]; if (prefixes.every((p) => p[i] === tok)) common.push(tok); else break; } return common; } function altLiteralPrefix(alt, grammar, literals, regexTokens, visited) { const out = []; for (const el of alt) { if (el.kind === 'term') { out.push(literals.get(termKey(el))); } else if (el.kind === 'regex') { out.push(regexTokens.get(regexKey(el))); } else if (el.kind === 'ref') { const sub = ruleLiteralPrefix(el.name, grammar, literals, regexTokens, visited); // Take the ref's literal prefix and stop — we can't see past // the ref without more expensive analysis. out.push(...sub); return out; } else { return out; } } return out; } // Enumerate concrete token-sequence prefixes an alternative can // start with, each at most `maxK` tokens long. Refs with multiple // alternatives fan out into one prefix per sub-alternative so the // caller can emit a dedicated dispatch alt for each path. When a // ref cycles back or exhausts depth, the path is *terminated* at // the tokens accumulated so far — the `done` flag is propagated // out of nested calls so a truncated sub-prefix is never extended // with tokens from elements the outer alt happens to list after the // cycled ref. function altPrefixesRaw(alt, grammar, literals, regexTokens, maxK, visited = new Set()) { let paths = [{ tokens: [], done: false }]; for (const el of alt) { const next = []; for (const p of paths) { if (p.done || p.tokens.length >= maxK) { next.push(p); continue; } if (el.kind === 'term') { next.push({ tokens: [...p.tokens, literals.get(termKey(el))], done: false, }); } else if (el.kind === 'regex') { next.push({ tokens: [...p.tokens, regexTokens.get(regexKey(el))], done: false, }); } else if (el.kind === 'ref') { if (visited.has(el.name)) { next.push({ tokens: p.tokens, done: true }); continue; } const childVisited = new Set(visited); childVisited.add(el.name); const target = grammar.productions.find((pr) => pr.name === el.name); if (!target || target.alts.length === 0) { next.push({ tokens: p.tokens, done: true }); continue; } for (const sub of target.alts) { const subPaths = altPrefixesRaw(sub, grammar, literals, regexTokens, maxK - p.tokens.length, childVisited); for (const sp of subPaths) { next.push({ tokens: [...p.tokens, ...sp.tokens], // Propagate `done` so the outer loop won't extend a // cycle-truncated sub-prefix. done: sp.done, }); } } } else { // Desugar should have eliminated group/star/etc. at this point. next.push({ tokens: p.tokens, done: true }); } } paths = next; if (paths.every((p) => p.done || p.tokens.length >= maxK)) break; } return paths; } function altPrefixes(alt, grammar, literals, regexTokens, maxK) { const raw = altPrefixesRaw(alt, grammar, literals, regexTokens, maxK); const seen = new Set(); const out = []; for (const p of raw) { const key = p.tokens.join(' '); if (!seen.has(key)) { seen.add(key); out.push(p.tokens); } } return out; } // A quoted-string literal is effectively case-sensitive either // when the user explicitly wrote `%s"…"` or when it contains no // ASCII letters (there's nothing to fold — `"+"` matches `+` in // any "case"). function isEffectivelyCaseSensitive(el) { if (el.caseSensitive === true) return true; return !/[A-Za-z]/.test(el.literal); } // Map a term element to the key used to look up (or allocate) its // emitted token. The key folds together the literal and its // effective case-sensitivity so a sensitive and an insensitive // occurrence of the same string are distinct tokens. function termKey(el) { return (isEffectivelyCaseSensitive(el) ? 'cs:' : 'ci:') + el.literal; } function escapeRegExp(s) { return s.replace(/[\\^$.*+?()[\]{}|]/g, '\\$&'); } // Decode an ABNF numeric value (`%xNN`, `%dNN`, `%bNN`, or one of // the range/concatenation forms) into a `BnfElement`. // // %x61 => single-char term "a" // %x66.6f.6f => concatenated term "foo" // %x30-39 => regex character class [\u0030-\u0039] // // Hex is case-insensitive; decimal and binary accept only digits // in their respective ranges. Range endpoints must be the same // base as the prefix (RFC 5234 doesn't allow mixing). function parseNumericValue(src) { const base = src[1].toLowerCase(); const radix = base === 'x' ? 16 : base === 'd' ? 10 : 2; const body = src.slice(2); if (body.includes('-')) { const [loStr, hiStr] = body.split('-'); const lo = parseInt(loStr, radix); const hi = parseInt(hiStr, radix); if (lo === hi) { return { kind: 'term', literal: String.fromCharCode(lo) }; } const toEsc = (n) => '\\u' + n.toString(16).padStart(4, '0'); return { kind: 'regex', pattern: '[' + toEsc(lo) + '-' + toEsc(hi) + ']', flags: '', }; } const parts = body.split('.'); const chars = parts.map((n) => String.fromCharCode(parseInt(n, radix))); return { kind: 'term', literal: chars.join('') }; } function allocTokenName(literal, used) { const base = literal .replace(/[^A-Za-z0-9]/g, '_') .toUpperCase() .replace(/^_+|_+$/g, ''); const candidate = base.length > 0 ? '#' + base : '#T'; if (!used.has(candidate)) { used.add(candidate); return candidate; } let i = 1; while (used.has(candidate + i)) i++; const chosen = candidate + i; used.add(chosen); return chosen; } // Public entry point: take BNF source and return a jsonic GrammarSpec. function bnf(src, opts) { const grammar = parseBnf(src); return emitGrammarSpec(grammar, opts); } //# sourceMappingURL=bnf.js.map ================================================ FILE: dist/debug.d.ts ================================================ import type { Plugin } from './jsonic'; declare const Debug: Plugin; export { Debug }; ================================================ FILE: dist/debug.js ================================================ "use strict"; /* Copyright (c) 2021-2023 Richard Rodger, MIT License */ Object.defineProperty(exports, "__esModule", { value: true }); exports.Debug = void 0; const jsonic_1 = require("./jsonic"); const DEFAULTS = { print: true, trace: { step: true, rule: true, lex: true, parse: true, node: true, stack: true, }, }; const { entries, tokenize } = jsonic_1.util; const Debug = (jsonic, options) => { options.trace = true === options.trace ? { ...DEFAULTS.trace } : options.trace; const { keys, values, entries } = jsonic.util; jsonic.debug = { describe: function () { let cfg = jsonic.internal().config; let match = cfg.lex.match; let rules = jsonic.rule(); return [ '========= TOKENS ========', Object.entries(cfg.t) .filter((te) => 'string' === typeof te[1]) .map((te) => { return (' ' + te[0] + '\t' + te[1] + '\t' + ((s) => (s ? '"' + s + '"' : ''))(cfg.fixed.ref[te[0]] || '')); }) .join('\n'), '\n', Object.entries(cfg.tokenSet) .map((te) => { return (' ' + te[0] + '\t' + Object.keys(cfg.tokenSetTins[te[0]] ?? [])); }) .join('\n'), '\n', , '========= RULES =========', ruleTree(jsonic, keys(rules), rules), '\n', '========= ALTS =========', values(rules) .map((rs) => ' ' + rs.name + ':\n' + descAlt(jsonic, rs, 'open') + descAlt(jsonic, rs, 'close')) .join('\n\n'), '\n', '========= LEXER =========', ' ' + ((match && match.map((m) => m.order + ': ' + m.matcher + ' (' + m.make.name + ')')) || []).join('\n '), '\n', '\n', '========= PLUGIN =========', ' ' + jsonic .internal() .plugins.map((p) => p.name + (p.options ? entries(p.options).reduce((s, e) => (s += '\n ' + e[0] + ': ' + JSON.stringify(e[1])), '') : '')) .join('\n '), '\n', ].join('\n'); }, }; const origUse = jsonic.use.bind(jsonic); jsonic.use = (...args) => { let self = origUse(...args); if (options.print) { self .internal() .config.debug.get_console() .log('USE:', args[0].name, '\n\n', self.debug.describe()); } return self; }; if (options.trace) { jsonic.options({ parse: { prepare: { debug: (_jsonic, ctx, _meta) => { const console_log = ctx.cfg.debug.get_console().log; console_log('\n========= TRACE =========='); ctx.log = ctx.log || ((kind, ...rest) => { if (LOGKIND[kind] && options.trace[kind]) { console_log(LOGKIND[kind](...rest) .filter((item) => 'object' != typeof item) .map((item) => 'function' == typeof item ? item.name : item) .join(' ')); } }); }, }, }, }); } }; exports.Debug = Debug; function descAlt(jsonic, rs, kind) { const { entries } = jsonic.util; return 0 === rs.def[kind].length ? '' : ' ' + kind.toUpperCase() + ':\n' + rs.def[kind] .map((a, i) => ' ' + ('' + i).padStart(5, ' ') + ' ' + ('[' + (a.s || []) .map((tin) => null == tin ? '***INVALID***' : 'number' === typeof tin ? jsonic.token[tin] : Array.isArray(tin) ? '[' + tin.map((t) => jsonic.token[t]) + ']' : ('' + tin)) .join(' ') + '] ').padEnd(32, ' ') + (a.r ? ' r=' + ('string' === typeof a.r ? a.r : '') : '') + (a.p ? ' p=' + ('string' === typeof a.p ? a.p : '') : '') + (!a.r && !a.p ? '\t' : '') + '\t' + (null == a.b ? '' : 'b=' + a.b) + '\t' + (null == a.n ? '' : 'n=' + entries(a.n).map(([k, v]) => k + ':' + v)) + '\t' + (null == a.a ? '' : 'A') + (null == a.c ? '' : 'C') + (null == a.h ? '' : 'H') + '\t' + (null == a.c?.n ? '\t' : ' CN=' + entries(a.c.n).map(([k, v]) => k + ':' + v)) + (null == a.c?.d ? '' : ' CD=' + a.c.d) + (a.g ? '\tg=' + a.g : '')) .join('\n') + '\n'; } function ruleTree(jsonic, rn, rsm) { const { values, omap } = jsonic.util; return rn.reduce((a, n) => ((a += ' ' + n + ':\n ' + values(omap({ op: ruleTreeStep(rsm, n, 'open', 'p'), or: ruleTreeStep(rsm, n, 'open', 'r'), cp: ruleTreeStep(rsm, n, 'close', 'p'), cr: ruleTreeStep(rsm, n, 'close', 'r'), }, ([n, d]) => [ 1 < d.length ? n : undefined, n + ': ' + d, ])).join('\n ') + '\n'), a), ''); } function ruleTreeStep(rsm, name, state, step) { return [ ...new Set(rsm[name].def[state] .filter((alt) => alt[step]) .map((alt) => alt[step]) .map((step) => ('string' === typeof step ? step : ''))), ].join(' '); } function descTokenState(ctx) { return ('[' + (ctx.NOTOKEN === ctx.t0 ? '' : ctx.F(ctx.t0.src)) + (ctx.NOTOKEN === ctx.t1 ? '' : ' ' + ctx.F(ctx.t1.src)) + ']~[' + (ctx.NOTOKEN === ctx.t0 ? '' : tokenize(ctx.t0.tin, ctx.cfg)) + (ctx.NOTOKEN === ctx.t1 ? '' : ' ' + tokenize(ctx.t1.tin, ctx.cfg)) + ']'); } function descParseState(ctx, rule, lex) { return (ctx.F(ctx.src().substring(lex.pnt.sI, lex.pnt.sI + 16)).padEnd(18, ' ') + ' ' + descTokenState(ctx).padEnd(34, ' ') + ' ' + ('' + rule.d).padStart(4, ' ')); } function descRuleState(ctx, rule) { let en = entries(rule.n); let eu = entries(rule.u); let ek = entries(rule.k); return ('' + (0 === en.length ? '' : ' N<' + en .filter((n) => n[1]) .map((n) => n[0] + '=' + n[1]) .join(';') + '>') + (0 === eu.length ? '' : ' U<' + eu.map((u) => u[0] + '=' + ctx.F(u[1])).join(';') + '>') + (0 === ek.length ? '' : ' K<' + ek.map((k) => k[0] + '=' + ctx.F(k[1])).join(';') + '>')); } function descAltSeq(alt, cfg) { return ('[' + (alt.s || []) .map((tin) => 'number' === typeof tin ? tokenize(tin, cfg) : Array.isArray(tin) ? '[' + tin.map((t) => tokenize(t, cfg)) + ']' : '') .join(' ') + '] '); } const LOG = { RuleState: { o: jsonic_1.S.open.toUpperCase(), c: jsonic_1.S.close.toUpperCase(), }, }; const LOGKIND = { step: (...rest) => rest, stack: (ctx, rule, lex) => [ jsonic_1.S.logindent + jsonic_1.S.stack, descParseState(ctx, rule, lex), // S.indent.repeat(Math.max(rule.d + ('o' === rule.state ? -1 : 1), 0)) + jsonic_1.S.indent.repeat(rule.d) + '/' + ctx.rs // .slice(0, ctx.rsI) .slice(0, rule.d) .map((r) => r.name + '~' + r.i) .join('/'), '~', '/' + ctx.rs // .slice(0, ctx.rsI) .slice(0, rule.d) .map((r) => ctx.F(r.node)) .join('/'), // 'd=' + rule.d, //'rsI=' + ctx.rsI, ctx, rule, lex, ], rule: (ctx, rule, lex) => [ rule, ctx, lex, jsonic_1.S.logindent + jsonic_1.S.rule + jsonic_1.S.space, descParseState(ctx, rule, lex), jsonic_1.S.indent.repeat(rule.d) + (rule.name + '~' + rule.i + jsonic_1.S.colon + LOG.RuleState[rule.state]).padEnd(16), ('prev=' + rule.prev.i + ' parent=' + rule.parent.i + ' child=' + rule.child.i).padEnd(28), descRuleState(ctx, rule), ], node: (ctx, rule, lex, next) => [ rule, ctx, lex, next, jsonic_1.S.logindent + jsonic_1.S.node + jsonic_1.S.space, descParseState(ctx, rule, lex), jsonic_1.S.indent.repeat(rule.d) + ('why=' + next.why + jsonic_1.S.space + '<' + ctx.F(rule.node) + '>').padEnd(46), descRuleState(ctx, rule), ], parse: (ctx, rule, lex, match, cond, altI, alt, out) => { let ns = match && out.n ? entries(out.n) : null; let us = match && out.u ? entries(out.u) : null; let ks = match && out.k ? entries(out.k) : null; return [ ctx, rule, lex, jsonic_1.S.logindent + jsonic_1.S.parse, descParseState(ctx, rule, lex), jsonic_1.S.indent.repeat(rule.d) + (match ? 'alt=' + altI : 'no-alt'), match && alt ? descAltSeq(alt, ctx.cfg) : '', match && out.g ? 'g:' + out.g + ' ' : '', (match && out.p ? 'p:' + out.p + ' ' : '') + (match && out.r ? 'r:' + out.r + ' ' : '') + (match && out.b ? 'b:' + out.b + ' ' : ''), alt && alt.c ? 'c:' + cond : jsonic_1.EMPTY, null == ns ? '' : 'n:' + ns.map((p) => p[0] + '=' + p[1]).join(';'), null == us ? '' : 'u:' + us.map((p) => p[0] + '=' + p[1]).join(';'), null == ks ? '' : 'k:' + ks.map((p) => p[0] + '=' + p[1]).join(';'), ]; }, lex: (ctx, rule, lex, pnt, sI, match, tkn, alt, altI, tI) => [ jsonic_1.S.logindent + jsonic_1.S.lex + jsonic_1.S.space + jsonic_1.S.space, descParseState(ctx, rule, lex), jsonic_1.S.indent.repeat(rule.d) + // S.indent.repeat(rule.d) + S.lex, // Log entry prefix. // Name of token from tin (token identification numer). tokenize(tkn.tin, ctx.cfg), ctx.F(tkn.src), // Format token src for log. pnt.sI, // Current source index. pnt.rI + ':' + pnt.cI, // Row and column. match?.name || '', alt ? 'on:alt=' + altI + ';' + alt.g + ';t=' + tI + ';' + descAltSeq(alt, ctx.cfg) : '', ctx.F(lex.src.substring(sI, sI + 16)), ctx, rule, lex, ], }; Debug.defaults = DEFAULTS; //# sourceMappingURL=debug.js.map ================================================ FILE: dist/defaults.d.ts ================================================ import { Options } from './jsonic'; declare const defaults: Options; export { defaults }; ================================================ FILE: dist/defaults.js ================================================ "use strict"; /* Copyright (c) 2013-2023 Richard Rodger, MIT License */ Object.defineProperty(exports, "__esModule", { value: true }); exports.defaults = void 0; // Functions that create token matching lexers. // The `make*Matcher` functions may optionally initialise // and validate Config properties specific to their lexing. const lexer_1 = require("./lexer"); const defaults = { // Prevent prototype pollution safe: { key: true, }, // Default tag - set your own! tag: '-', // Fixed token lexing. fixed: { // Recognize fixed tokens in the Lexer. lex: true, // Token names. token: { '#OB': '{', '#CB': '}', '#OS': '[', '#CS': ']', '#CL': ':', '#CA': ',', }, }, match: { lex: true, token: {}, }, // Token sets. tokenSet: { IGNORE: ['#SP', '#LN', '#CM'], VAL: ['#TX', '#NR', '#ST', '#VL'], KEY: ['#TX', '#NR', '#ST', '#VL'], }, // Recognize space characters in the lexer. space: { // Recognize space in the Lexer. lex: true, // Space characters are kept to a minimal set. // Add more from https://en.wikipedia.org/wiki/Whitespace_character as needed. chars: ' \t', }, // Line lexing. line: { // Recognize lines in the Lexer. lex: true, // Line characters. chars: '\r\n', // Increments row (aka line) counter. rowChars: '\n', // Generate separate lexer tokens for each newline. // Note: '\r\n' counts as one newline. single: false, }, // Text formats. text: { // Recognize text (non-quoted strings) in the Lexer. lex: true, }, // Control number formats. number: { // Recognize numbers in the Lexer. lex: true, // Recognize hex numbers (eg. 10 === 0x0a). hex: true, // Recognize octal numbers (eg. 10 === 0o12). oct: true, // Recognize ninary numbers (eg. 10 === 0b1010). bin: true, // All possible number chars. |+-|0|xob|0-9a-fA-F|.e|+-|0-9a-fA-F| // digital: '-1023456789._xoeEaAbBcCdDfF+', // Allow embedded separator. `null` to disable. sep: '_', // Exclude number strings matching this RegExp exclude: undefined, }, // Comment markers. // : true -> single line comments // : -> multiline comments comment: { // Recognize comments in the Lexer. lex: true, // TODO: plugin // Balance multiline comments. // balance: true, // Comment markers. def: { hash: { line: true, start: '#', lex: true, eatline: false }, slash: { line: true, start: '//', lex: true, eatline: false }, multi: { line: false, start: '/' + '*', end: '*' + '/', lex: true, eatline: false, }, }, }, // String formats. string: { // Recognize strings in the Lexer. lex: true, // Quote characters chars: '\'"`', // Multiline quote chars. multiChars: '`', // Escape character. escapeChar: '\\', // String escape chars. // Denoting char (follows escape char) => actual char. escape: { b: '\b', f: '\f', n: '\n', r: '\r', t: '\t', v: '\v', // These preserve standard escapes when allowUnknown=false. '"': '"', "'": "'", '`': '`', '\\': '\\', '/': '/', }, // Allow unknown escape characters - they are copied to output: '\w' -> 'w'. allowUnknown: true, // If string lexing fails, instead of error, allow other matchers to try. abandon: false, }, // Object formats. map: { // TODO: or trigger error? // Later duplicates extend earlier ones, rather than replacing them. extend: true, // Custom merge function for duplicates (optional). // TODO: needs function signature merge: undefined, // Allow bare colon `:value` in maps, stored as `child$` property. child: false, }, // Array formats. list: { // Allow arrays to have properties: `[a:9,0,1]` property: true, // Parse pairs as object elements: `[a:1]` -> `[{"a":1}]` // Takes precedence over list.property when true. pair: false, // Parse bare colon as child$ property: `[:1]` -> [] with child$=1 // Multiple child values merge. child: false, }, // Metadata info markers. When enabled, a non-enumerable marker property // is attached to parsed nodes with metadata (implicit flag, meta bag, etc.). info: { // Attach marker to map nodes. map: false, // Attach marker to list nodes. list: false, // Wrap string values as String objects with marker (quote info). text: false, // Property name for the marker. marker: '__info__', }, // Keyword values. value: { lex: true, def: { true: { val: true }, false: { val: false }, null: { val: null }, }, }, // Additional text ending characters ender: [], // Plugin custom options, (namespace by plugin name). plugin: {}, // Debug settings debug: { // Default console for logging. get_console: () => console, // Max length of parse value to print. maxlen: 99, // Print internal structures print: { // Print config built from options. config: false, // Custom string formatter for src and node values. src: undefined, }, }, // Error messages. error: { unknown: 'unknown error: {code}', unexpected: 'unexpected character(s): {src}', invalid_unicode: 'invalid unicode escape: {src}', invalid_ascii: 'invalid ascii escape: {src}', unprintable: 'unprintable character: {src}', unterminated_string: 'unterminated string: {src}', unterminated_comment: 'unterminated comment: {src}', unknown_rule: 'unknown rule: {rulename}', end_of_source: 'unexpected end of source', }, errmsg: { name: 'jsonic', suffix: true }, // Error hints: {error-code: hint-text}. hint: { unknown: ` Since the error is unknown, this is probably a bug inside jsonic itself, or a plugin. Please consider posting a github issue - thanks! Code: {code}, Details: {details}`, unexpected: ` The character(s) {src} were not expected at this point as they do not match the expected syntax, even under the relaxed jsonic rules. If it is not obviously wrong, the actual syntax error may be elsewhere. Try commenting out larger areas around this point until you get no errors, then remove the comments in small sections until you find the offending syntax. NOTE: Also check if any plugins you are using expect different syntax in this case.`, invalid_unicode: ` The escape sequence {src} does not encode a valid unicode code point number. You may need to validate your string data manually using test code to see how JavaScript will interpret it. Also consider that your data may have become corrupted, or the escape sequence has not been generated correctly.`, invalid_ascii: ` The escape sequence {src} does not encode a valid ASCII character. You may need to validate your string data manually using test code to see how JavaScript will interpret it. Also consider that your data may have become corrupted, or the escape sequence has not been generated correctly.`, unprintable: ` String values cannot contain unprintable characters (character codes below 32). The character {src} is unprintable. You may need to remove these characters from your source data. Also check that it has not become corrupted.`, unterminated_string: ` This string has no end quote.`, unterminated_comment: ` This comment is never closed.`, unknown_rule: ` No rule named $rulename is defined. This is probably an error in the grammar of a plugin.`, end_of_source: ` Unexpected end of source.`, }, // Lexer lex: { match: { match: { order: 1e6, make: lexer_1.makeMatchMatcher }, fixed: { order: 2e6, make: lexer_1.makeFixedMatcher }, space: { order: 3e6, make: lexer_1.makeSpaceMatcher }, line: { order: 4e6, make: lexer_1.makeLineMatcher }, string: { order: 5e6, make: lexer_1.makeStringMatcher }, comment: { order: 6e6, make: lexer_1.makeCommentMatcher }, number: { order: 7e6, make: lexer_1.makeNumberMatcher }, text: { order: 8e6, make: lexer_1.makeTextMatcher }, }, // Empty string is allowed and returns undefined empty: true, emptyResult: undefined, }, // Parser parse: { // Plugin custom functions to prepare parser context. prepare: {}, }, // Parser rule options. rule: { // Name of the starting rule. start: 'val', // Automatically close remaining structures at EOF. finish: true, // Multiplier to increase the maximum number of rule occurences. maxmul: 3, // Include only those alts with matching group tags (comma sep). // NOTE: applies universally, thus also for subsequent rules. include: '', // Exclude alts with matching group tags (comma sep). // NOTE: applies universally, thus also for subsequent rules. exclude: '', }, // Result value options. result: { // Fail if result matches any of these. fail: [], }, // Token-rewind options. `history` bounds how many consumed tokens // are retained on ctx.v for ctx.rewind(). The default of 64 keeps // parse-time memory bounded for large inputs; raise it if a // grammar needs to rewind further, or set to Infinity to retain // every consumed token. ctx.rewind(mark) throws if `mark` falls // outside the retained window. rewind: { history: 64, }, // Configuration options. config: { // Configuration modifiers. modify: {}, }, // Provide a custom parser. parser: { start: undefined, }, }; exports.defaults = defaults; //# sourceMappingURL=defaults.js.map ================================================ FILE: dist/error.d.ts ================================================ import type { Bag, Context, Rule, Token } from './types'; declare class JsonicError extends SyntaxError { constructor(code: string, details: Bag, token: Token, rule: Rule, ctx: Context); } declare function errinject(s: T, code: string, details: Bag, token: Token, rule: Rule, ctx: Context): T; declare function trimstk(err: Error): void; declare function errsite(spec: { src: string; sub?: string; msg?: string; row?: number; col?: number; pos?: number; cline?: string; }): string; declare function errmsg(spec: { code?: string; name?: string; txts?: { msg?: string; hint?: string; site?: string; }; smsg?: string; src?: string; file?: string; row?: number; col?: number; pos?: number; site?: string; sub?: string; prefix?: string | Function; suffix?: string | Function; color?: { active?: boolean; reset?: string; hi?: string; lo?: string; line?: string; }; }): string; declare function errdesc(code: string, details: Bag, token: Token, rule: Rule, ctx: Context): Bag; declare function strinject(s: T, m: Bag, f?: { indent?: string; }): T; declare function prop(obj: any, path: string, val?: any): any; export { JsonicError, errdesc, errinject, errsite, errmsg, trimstk, strinject, prop, }; ================================================ FILE: dist/error.js ================================================ "use strict"; /* Copyright (c) 2013-2024 Richard Rodger, MIT License */ Object.defineProperty(exports, "__esModule", { value: true }); exports.JsonicError = void 0; exports.errdesc = errdesc; exports.errinject = errinject; exports.errsite = errsite; exports.errmsg = errmsg; exports.trimstk = trimstk; exports.strinject = strinject; exports.prop = prop; const types_1 = require("./types"); const utility_1 = require("./utility"); const S = { function: 'function', object: 'object', string: 'string', unexpected: 'unexpected', Object: 'Object', Array: 'Array', gap: ' ', no_re_flags: types_1.EMPTY, }; // Jsonic errors with nice formatting. class JsonicError extends SyntaxError { constructor(code, details, token, rule, ctx) { details = (0, utility_1.deep)({}, details); let desc = errdesc(code, details, token, rule, ctx); super(desc.message); (0, utility_1.assign)(this, desc); } } exports.JsonicError = JsonicError; // Inject value text into an error message. The value is taken from // the `details` parameter to JsonicError. If not defined, the value is // determined heuristically from the Token and Context. function errinject(s, code, details, token, rule, ctx) { let ref = { ...(ctx || {}), ...(ctx.cfg || {}), ...(ctx.opts || {}), ...(token || {}), ...(rule || {}), ...(ctx.meta || {}), ...(details || {}), ...{ code, details, token, rule, ctx }, }; return strinject(s, ref, { indent: ' ' }); } // Remove Jsonic internal lines as spurious for caller. function trimstk(err) { if (err.stack) { err.stack = err.stack .split('\n') .filter((s) => !s.includes('jsonic/jsonic')) .map((s) => s.replace(/ at /, 'at ')) .join('\n'); } } // Extract error site in source text and mark error point. */ function errsite(spec) { let { src, sub, msg, cline, row, col, pos } = spec; row = null != row && 0 < row ? row : 1; col = null != col && 0 < col ? col : 1; pos = null != pos && 0 < pos ? pos : null == src ? 0 : src .split('\n') .reduce((pos, line, i) => ((pos += i < row - 1 ? line.length + 1 : i === row - 1 ? col : 0), pos), 0); let tsrc = null == sub ? types_1.EMPTY : sub; let behind = src.substring(Math.max(0, pos - 333), pos).split('\n'); let ahead = src.substring(pos, pos + 333).split('\n'); let pad = 2 + (types_1.EMPTY + (row + 2)).length; let rc = row < 3 ? 1 : row - 2; let ln = (s) => (null == cline ? '' : cline) + (types_1.EMPTY + rc++).padStart(pad, ' ') + ' | ' + (null == cline ? '' : '\x1b[0m') + (null == s ? types_1.EMPTY : s); let blen = behind.length; let lines = [ 2 < blen ? ln(behind[blen - 3]) : null, 1 < blen ? ln(behind[blen - 2]) : null, ln(behind[blen - 1] + ahead[0]), ' '.repeat(pad) + ' ' + ' '.repeat(col - 1) + (null == cline ? '' : cline) + '^'.repeat(tsrc.length || 1) + ' ' + msg + (null == cline ? '' : '\x1b[0m'), ln(ahead[1]), ln(ahead[2]), ] .filter((line) => null != line) .join('\n'); return lines; } function errmsg(spec) { const color = { active: false, reset: '', hi: '', lo: '', line: '', }; if (spec.color && spec.color.active) { Object.assign(color, spec.color); } const txts = { msg: null, hint: null, site: null, ...(spec.txts || {}) }; let message = [ null == spec.prefix ? null : 'function' === typeof spec.prefix ? spec.prefix(color, spec) : '' + spec.prefix, (null == spec.code ? '' : color.hi + '[' + (null == spec.name ? '' : spec.name + '/') + spec.code + ']:') + color.reset + ' ' + // (null == spec.msg ? '' : spec.msg), (null == txts.msg ? '' : txts.msg), (null != spec.row && null != spec.col) || null != spec.file ? ' ' + color.line + '-->' + color.reset + ' ' + (null == spec.file ? '' : spec.file) + (null == spec.row || null == spec.col ? '' : ':' + spec.row + ':' + spec.col) : null, null == spec.src ? '' : (null == txts.site ? '' : errsite({ src: spec.src, sub: spec.sub, msg: spec.smsg || spec.txts?.msg, cline: color.line, row: spec.row, col: spec.col, pos: spec.pos, })), '', // null == spec.hint ? null : spec.hint, // txts.hint, (null == txts.hint ? '' : txts.hint), null == spec.suffix ? null : 'function' === typeof spec.suffix ? spec.suffix(color, spec) : '' + spec.suffix, ] .filter((n) => null != n) .join('\n'); return message; } function errdesc(code, details, token, rule, ctx) { try { const src = ctx.src(); const cfg = ctx.cfg; const meta = ctx.meta; const txts = errinject({ msg: cfg.error[code] || (details?.use?.err && (details.use.err.code || details.use.err.message)) || cfg.error.unknown, hint: (cfg.hint[code] || details.use?.err?.message || cfg.hint.unknown || '') .trim() .split('\n') .map((s) => ' ' + s) .join('\n'), site: '', }, code, details, token, rule, ctx); txts.site = errsite({ src, msg: txts.msg, cline: cfg.color.active ? cfg.color.line : '', row: token.rI, col: token.cI, pos: token.sI, sub: token.src, }); const suffix = true === cfg.errmsg.suffix ? (color) => [ '', ' ' + color.lo + 'https://jsonic.senecajs.org' + color.reset + '', ' ' + color.lo + '--internal: tag=' + (ctx.opts.tag || '') + '; rule=' + rule.name + '~' + rule.state + '; token=' + (0, utility_1.tokenize)(token.tin, ctx.cfg) + (null == token.why ? '' : '~' + token.why) + '; plugins=' + ctx .plgn() .map((p) => p.name) .join(',') + '--' + color.reset, ].join('\n') : ('string' === typeof cfg.errmsg.suffix || 'function' === typeof cfg.errmsg.suffix) ? cfg.errmsg.suffix : undefined; let message = errmsg({ code, // name: 'jsonic', name: cfg.errmsg.name, txts, src, file: meta ? meta.fileName : undefined, row: token.rI, col: token.cI, pos: token.sI, sub: token.src, color: cfg.color, suffix, }); let desc = { internal: { token, ctx, }, }; desc = { ...Object.create(desc), message, code, details, meta, fileName: meta ? meta.fileName : undefined, lineNumber: token.rI, columnNumber: token.cI, txts: () => txts }; return desc; } catch (e) { // TODO: fix console.log(e); return {}; } } // Inject value into text by key using "{key}" syntax. function strinject(s, m, f) { let st = typeof s; let t = Array.isArray(s) ? 'array' : null == s ? 'string' : 'object' === st ? st : 'string'; let so = 'object' === t ? s : 'array' === t ? s.reduce((a, n, i) => ((a[i] = n), a), {}) : { _: s }; let mo = null == m ? {} : m; Object.entries(so).map((n) => (so[n[0]] = null == n[1] ? '' : ('' + n[1]).replace(/\{([\w_0-9.]+)}/g, (match, keypath) => { let inject = prop(mo, keypath); inject = undefined === inject ? match : inject; if ('object' === typeof inject) { let cn = inject?.constructor?.name; if ('Object' === cn || 'Array' === cn) { inject = JSON.stringify(inject).replace(/([^"])"/g, '$1'); } else { inject = inject.toString(); } } else { inject = '' + inject; } if (f) { if ('string' === typeof f.indent) { inject = inject.replace(/\n/g, '\n' + f.indent); } } return inject; }))); return ('string' === t ? so._ : 'array' === t ? Object.values(so) : so); } function prop(obj, path, val) { let root = obj; try { let parts = path.split('.'); let pn; for (let pI = 0; pI < parts.length; pI++) { pn = parts[pI]; if ('__proto__' === pn) { throw new Error(pn); } if (pI < parts.length - 1) { obj = obj[pn] = obj[pn] || {}; } } if (undefined !== val) { if ('__proto__' === pn) { throw new Error(pn); } obj[pn] = val; } return obj[pn]; } catch (e) { throw new Error('Cannot ' + (undefined === val ? 'get' : 'set') + ' path ' + path + ' on object: ' + str(root) + (undefined === val ? '' : ' to value: ' + str(val, 22))); } } function str(o, len = 44) { let s; try { s = 'object' === typeof o ? JSON.stringify(o) : '' + o; } catch (e) { s = '' + o; } return snip(len < s.length ? s.substring(0, len - 3) + '...' : s, len); } function snip(s, len = 5) { return undefined === s ? '' : ('' + s).substring(0, len).replace(/[\r\n\t]/g, '.'); } //# sourceMappingURL=error.js.map ================================================ FILE: dist/grammar.d.ts ================================================ import { Jsonic } from './jsonic'; declare function grammar(jsonic: Jsonic): void; declare function makeJSON(jsonic: any): any; export { grammar, makeJSON }; ================================================ FILE: dist/grammar.js ================================================ "use strict"; /* Copyright (c) 2013-2024 Richard Rodger, MIT License */ Object.defineProperty(exports, "__esModule", { value: true }); exports.grammar = grammar; exports.makeJSON = makeJSON; const defprop = Object.defineProperty; function mark(node, marker, data) { if (node != null && typeof node === 'object') { defprop(node, marker, { value: data, writable: true }); } } function grammar(jsonic) { const { deep } = jsonic.util; const { // Fixed tokens // OB, // Open Brace `{` // CB, // Close Brace `}` // OS, // Open Square `[` // CS, // Close Square `]` // CL, // Colon `:` CA, // Comma `,` // Complex tokens TX, // Text (unquoted character sequence) ST, // String (quoted character sequence) // Control tokens ZZ, // End-of-source } = jsonic.token; const { VAL, // All tokens that make up values // KEY, // All tokens that make up keys } = jsonic.tokenSet; const fnm = { '@finish': (_rule, ctx) => { if (!ctx.cfg.rule.finish) { // TODO: pass missing end char for replacement in error message ctx.t0.err = 'end_of_source'; return ctx.t0; } }, // TODO: define a way to "export" rule actions or other functions so that // other plugins can use them. '@pairkey': (r) => { // Get key string value from first matching token of `Open` state. const key_token = r.o0; const key = ST === key_token.tin || TX === key_token.tin ? key_token.val // Was text : key_token.src; // Was number, use original text r.u.key = key; }, }; // Plain JSON // ---------- jsonic.grammar({ ref: { '@finish': (_rule, ctx) => { if (!ctx.cfg.rule.finish) { // TODO: pass missing end char for replacement in error message ctx.t0.err = 'end_of_source'; return ctx.t0; } }, // TODO: define a way to "export" rule actions or other functions so that // other plugins can use them. '@pairkey': (r) => { // Get key string value from first matching token of `Open` state. const key_token = r.o0; const key = ST === key_token.tin || TX === key_token.tin ? key_token.val // Was text : key_token.src; // Was number, use original text r.u.key = key; }, '@val-bo': (rule) => (rule.node = undefined), '@val-bc': (r, ctx) => { // NOTE: val can be undefined when there is no value at all // (eg. empty string, thus no matched opening token) r.node = // If there's no node, undefined === r.node ? // ... or no child node (child map or list), undefined === r.child.node ? // ... or no matched tokens, 0 === r.os ? // ... then the node has no value undefined : // .. otherwise use the token value (() => { let val = r.o0.resolveVal(r, ctx); if (ctx.cfg.info.text && typeof val === 'string' && (r.o0.tin === ctx.cfg.t.ST || r.o0.tin === ctx.cfg.t.TX)) { let quote = r.o0.tin === ctx.cfg.t.ST && r.o0.src.length > 0 ? r.o0.src[0] : ''; let sv = new String(val); mark(sv, ctx.cfg.info.marker, { quote }); val = sv; } return val; })() : r.child.node : r.node; }, '@map-bo': (r, ctx) => { // Create a new empty map. r.node = Object.create(null); if (ctx.cfg.info.map) { mark(r.node, ctx.cfg.info.marker, { implicit: false, meta: {} }); } }, '@list-bo': (r, ctx) => { // Create a new empty list. r.node = []; if (ctx.cfg.info.list) { mark(r.node, ctx.cfg.info.marker, { implicit: false, meta: {} }); } }, '@pair-bc': (r, ctx) => { if (r.u.pair) { // Drop keys that match the info marker to preserve metadata. if (ctx.cfg.info.map && r.u.key === ctx.cfg.info.marker) { return; } // Store previous value (if any, for extensions). r.u.prev = r.node[r.u.key]; r.node[r.u.key] = r.child.node; } }, '@elem-bc': (r) => { if (true !== r.u.done && undefined !== r.child.node) { r.node.push(r.child.node); } }, }, rule: { val: { // Opening token alternates. open: [ // A map: `{ ...` { s: '#OB', p: 'map', b: 1, g: 'map,json' }, // A list: `[ ...` { s: '#OS', p: 'list', b: 1, g: 'list,json' }, // A plain value: `x` `"x"` `1` `true` .... { s: '#VAL', g: 'val,json' }, ], // Closing token alternates. close: [ // End of input. { s: '#ZZ', g: 'end,json' }, // There's more JSON. { b: 1, g: 'more,json' }, ] }, map: { open: [ // An empty map: {}. { s: '#OB #CB', b: 1, n: { pk: 0 }, g: 'map,json' }, // Start matching map key-value pairs: a:1. // Reset counter n.pk as new map (for extensions). { s: '#OB', p: 'pair', n: { pk: 0 }, g: 'map,json,pair' }, ], close: [ // End of map. { s: '#CB', g: 'end,json' }, ], }, list: { open: [ // An empty list: []. { s: '#OS #CS', b: 1, g: 'list,json' }, // Start matching list elements: 1,2. { s: '#OS', p: 'elem', g: 'list,elem,json' }, ], close: [ // End of map. { s: '#CS', g: 'end,json' }, ] }, // sets key:val on node pair: { open: [ // Match key-colon start of pair. Marker `pair=true` allows flexibility. { s: '#KEY #CL', p: 'val', u: { pair: true }, a: '@pairkey', g: 'map,pair,key,json', }, ], close: [ // Comma means a new pair at same pair-key level. { s: '#CA', r: 'pair', g: 'map,pair,json' }, // End of map. { s: '#CB', b: 1, g: 'map,pair,json' }, ] }, // push onto node elem: { open: [ // List elements are values. { p: 'val', g: 'list,elem,val,json' }, ], close: [ // Next element. { s: '#CA', r: 'elem', g: 'list,elem,json' }, // End of list. { s: '#CS', b: 1, g: 'list,elem,json' }, ], }, }, }); /* jsonic.rule('val', (rs: RuleSpec) => { rs .fnref({ '@val-bo': (rule: Rule) => (rule.node = undefined), '@val-bc': (r: Rule, ctx: Context) => { // NOTE: val can be undefined when there is no value at all // (eg. empty string, thus no matched opening token) r.node = // If there's no node, undefined === r.node ? // ... or no child node (child map or list), undefined === r.child.node ? // ... or no matched tokens, 0 === r.os ? // ... then the node has no value undefined : // .. otherwise use the token value r.o0.resolveVal(r, ctx) : r.child.node : r.node } }) // Clear the current node as this a new value. // .bo((rule: Rule) => (rule.node = undefined)) // .bo('@val-bo') // Opening token alternates. .open([ // A map: `{ ...` { s: '#OB', p: 'map', b: 1, g: 'map,json' }, // A list: `[ ...` { s: '#OS', p: 'list', b: 1, g: 'list,json' }, // A plain value: `x` `"x"` `1` `true` .... { s: '#VAL', g: 'val,json' }, ]) // Closing token alternates. .close([ // End of input. { s: '#ZZ', g: 'end,json' }, // There's more JSON. { b: 1, g: 'more,json' }, ]) // .bc('@val-bc') }) jsonic.rule('map', (rs: RuleSpec) => { rs .fnref({ '@map-bo': (r: Rule) => { // Create a new empty map. r.node = Object.create(null) } }) // .bo('@bo') .open([ // An empty map: {}. { s: '#OB #CB', b: 1, n: { pk: 0 }, g: 'map,json' }, // Start matching map key-value pairs: a:1. // Reset counter n.pk as new map (for extensions). { s: '#OB', p: 'pair', n: { pk: 0 }, g: 'map,json,pair' }, ]) .close([ // End of map. { s: '#CB', g: 'end,json' }, ]) }) jsonic.rule('list', (rs: RuleSpec) => { rs .fnref({ '@list-bo': (r: Rule) => { // Create a new empty list. r.node = [] } }) // .bo('@bo') .open([ // An empty list: []. { s: '#OS #CS', b: 1, g: 'list,json' }, // Start matching list elements: 1,2. { s: '#OS', p: 'elem', g: 'list,elem,json' }, ]) .close([ // End of map. { s: '#CS', g: 'end,json' }, ]) }) // sets key:val on node jsonic.rule('pair', (rs: RuleSpec) => { rs .fnref({ ...fnm, '@pair-bc': (r: Rule, _ctx: Context) => { if (r.u.pair) { // Store previous value (if any, for extensions). r.u.prev = r.node[r.u.key] r.node[r.u.key] = r.child.node } } }) .open([ // Match key-colon start of pair. Marker `pair=true` allows flexibility. { s: '#KEY #CL', p: 'val', u: { pair: true }, a: '@pairkey', g: 'map,pair,key,json', }, ]) // .bc('@bc') .close([ // Comma means a new pair at same pair-key level. { s: '#CA', r: 'pair', g: 'map,pair,json' }, // End of map. { s: '#CB', b: 1, g: 'map,pair,json' }, ]) }) // push onto node jsonic.rule('elem', (rs: RuleSpec) => { rs .fnref({ ...fnm, '@elem-bc': (r: Rule) => { if (true !== r.u.done && undefined !== r.child.node) { r.node.push(r.child.node) } } }) .open([ // List elements are values. { p: 'val', g: 'list,elem,val,json' }, ]) // .bc('@bc') .close([ // Next element. { s: '#CA', r: 'elem', g: 'list,elem,json' }, // End of list. { s: '#CS', b: 1, g: 'list,elem,json' }, ]) }) */ // Jsonic syntax extensions. // NOTE: undefined values are still removed, as JSON does not have "undefined", only null. // Counters. // * pk: depth of the pair-key path // * dmap: depth of maps function pairval(r, ctx) { let key = r.u.key; let val = r.child.node; const prev = r.u.prev; // Convert undefined to null when there was no pair value val = undefined === val ? null : val; // Do not set unsafe keys on Arrays (Objects are created without a prototype) if (r.u.list && ctx.cfg.safe.key) { if ('__proto__' === key || 'constructor' === key) { return; } } // Drop keys that match the info marker to preserve metadata. if (ctx.cfg.info.map && key === ctx.cfg.info.marker) { return; } val = null == prev ? val : ctx.cfg.map.merge ? ctx.cfg.map.merge(prev, val, r, ctx) : ctx.cfg.map.extend ? deep(prev, val) : val; r.node[key] = val; } jsonic.grammar({ ref: { '@val-close-error': (r, c) => (0 === r.d ? c.t0 : undefined), }, rule: { val: { open: { alts: [ // A pair key: `a: ...` // Implicit map at top level. { s: '#KEY #CL', c: { d: 0 }, p: 'map', b: 2, g: 'pair,jsonic,top', }, // A pair dive: `a:b: ...` // Increment counter n.pk to indicate pair-key depth (for extensions). // a:9 -> pk=undef, a:b:9 -> pk=1, a:b:c:9 -> pk=2, etc { s: '#KEY #CL', p: 'map', b: 2, n: { pk: 1 }, g: 'pair,jsonic', }, // A plain value: `x` `"x"` `1` `true` .... { s: '#VAL', g: 'val,json' }, // Implicit ends `{a:}` -> {"a":null}, `[a:]` -> [{"a":null}] { s: ['#CB #CS'], b: 1, c: { d: { $gt: 0 } }, g: 'val,imp,null,jsonic', }, // Implicit list at top level: a,b. { s: '#CA', c: { d: 0 }, p: 'list', b: 1, g: 'list,imp,jsonic', }, // Value is implicitly null when empty before commas. { s: '#CA', b: 1, g: 'list,val,imp,null,jsonic' }, { s: '#ZZ', g: 'jsonic' }, ], inject: { append: true, delete: [2] }, }, close: { alts: [ // Explicitly close map or list: `}`, `]` { s: ['#CB #CS'], b: 1, g: 'val,json,close', e: '@val-close-error', // (r, c) => (0 === r.d ? c.t0 : undefined), }, // Implicit list (comma sep) only allowed at top level: `1,2`. { s: '#CA', c: { 'n.dlist': { $lte: 0 }, 'n.dmap': { $lte: 0 } }, r: 'list', u: { implist: true }, g: 'list,val,imp,comma,jsonic', }, // Implicit list (space sep) only allowed at top level: `1 2`. { c: { 'n.dlist': { $lte: 0 }, 'n.dmap': { $lte: 0 } }, r: 'list', u: { implist: true }, g: 'list,val,imp,space,jsonic', b: 1, }, { s: '#ZZ', g: 'jsonic' }, ], inject: { append: true, // Move "There's more JSON" to end. move: [1, -1], } } } } }); /* jsonic.rule('val', (rs: RuleSpec) => { rs .open( [ // A pair key: `a: ...` // Implicit map at top level. { s: '#KEY #CL', c: { d: 0 }, p: 'map', b: 2, g: 'pair,jsonic,top', }, // A pair dive: `a:b: ...` // Increment counter n.pk to indicate pair-key depth (for extensions). // a:9 -> pk=undef, a:b:9 -> pk=1, a:b:c:9 -> pk=2, etc { s: '#KEY #CL', p: 'map', b: 2, n: { pk: 1 }, g: 'pair,jsonic', }, // A plain value: `x` `"x"` `1` `true` .... { s: [VAL], g: 'val,json' }, // Implicit ends `{a:}` -> {"a":null}, `[a:]` -> [{"a":null}] { s: ['#CB #CS'], b: 1, c: { d: { $gt: 0 } }, g: 'val,imp,null,jsonic', }, // Implicit list at top level: a,b. { s: '#CA', c: { d: 0 }, p: 'list', b: 1, g: 'list,imp,jsonic', }, // Value is implicitly null when empty before commas. { s: '#CA', b: 1, g: 'list,val,imp,null,jsonic' }, { s: '#ZZ', g: 'jsonic' }, ], { append: true, delete: [2] }, ) .close( [ // Explicitly close map or list: `}`, `]` { s: ['#CB #CS'], b: 1, g: 'val,json,close', e: (r, c) => (0 === r.d ? c.t0 : undefined), }, // Implicit list (comma sep) only allowed at top level: `1,2`. { s: '#CA', c: { 'n.dlist': { $lte: 0 }, 'n.dmap': { $lte: 0 } }, r: 'list', u: { implist: true }, g: 'list,val,imp,comma,jsonic', }, // Implicit list (space sep) only allowed at top level: `1 2`. { c: { 'n.dlist': { $lte: 0 }, 'n.dmap': { $lte: 0 } }, r: 'list', u: { implist: true }, g: 'list,val,imp,space,jsonic', b: 1, }, { s: '#ZZ', g: 'jsonic' }, ], { append: true, // Move "There's more JSON" to end. move: [1, -1], }, ) }) */ jsonic.rule('map', (rs) => { rs .fnref({ ...fnm }) .bo((r) => { // Increment depth of maps. r.n.dmap = 1 + (r.n.dmap ? r.n.dmap : 0); }) .open([ // Auto-close; fail if rule.finish option is false. { s: '#OB #ZZ', b: 1, e: '@finish', g: 'end,jsonic' }, ]) .open([ // Pair from implicit map. { s: '#KEY #CL', p: 'pair', b: 2, g: 'pair,list,val,imp,jsonic' }, ], { append: true }) .close([ // Normal end of map, no path dive. { s: '#CB', c: { 'n.pk': { $lte: 0 } }, g: 'end,json', }, // Not yet at end of path dive, keep ascending. { s: '#CB', b: 1, g: 'path,jsonic' }, // End of implicit path { s: ['#CA #CS #VAL'], b: 1, g: 'end,path,jsonic' }, // Auto-close; fail if rule.finish option is false. { s: '#ZZ', e: '@finish', g: 'end,jsonic' }, ], { append: true, delete: [0] }) .bc((r, ctx) => { let m = ctx.cfg.info.marker; if (ctx.cfg.info.map && r.node?.[m]) { r.node[m].implicit = !(r.o0 && r.o0.tin === ctx.cfg.t.OB); } }); }); jsonic.rule('list', (rs) => { rs .fnref({ ...fnm, '@list-bo': (r) => { // Increment depth of lists. r.n.dlist = 1 + (r.n.dlist ? r.n.dlist : 0); if (r.prev.u.implist) { r.node.push(r.prev.node); r.prev.node = r.node; } } }) // .bo('@bo') .open({ c: { 'prev.u.implist': { $eq: true } }, p: 'elem', }) .open([ // Initial comma [, will insert null as [null, { s: '#CA', p: 'elem', b: 1, g: 'list,elem,val,imp,jsonic' }, // Another element. { p: 'elem', g: 'list,elem,jsonic' }, ], { append: true }) .close([ // Fail if rule.finish option is false. { s: '#ZZ', e: '@finish', g: 'end,jsonic' }, ], { append: true }) .bc((r, ctx) => { let m = ctx.cfg.info.marker; if (ctx.cfg.info.list && r.node?.[m]) { r.node[m].implicit = !(r.o0 && r.o0.tin === ctx.cfg.t.OS); } }); }); // sets key:val on node jsonic.rule('pair', (rs, p) => { rs .fnref({ ...fnm, '@pair-bc': (r, ctx) => { if (r.u.pair) { pairval(r, ctx); } if (true === r.u.child) { let val = r.child.node; val = undefined === val ? null : val; let prev = r.node['child$']; if (undefined === prev) { r.node['child$'] = val; } else { r.node['child$'] = ctx.cfg.map.merge ? ctx.cfg.map.merge(prev, val, r, ctx) : ctx.cfg.map.extend ? deep(prev, val) : val; } } } }) .open([ // Ignore initial comma: {,a:1. { s: '#CA', g: 'map,pair,comma,jsonic' }, // map.child: bare colon `:value` stores value on child$ property. p.cfg.map.child && { s: '#CL', p: 'val', u: { done: true, child: true }, g: 'map,pair,child,jsonic', }, ], { append: true }) // NOTE: JSON pair.bc runs first, then this bc may override value. // .bc('@bc') .close([ // End of map, reset implicit depth counter so that // a:b:c:1,d:2 -> {a:{b:{c:1}},d:2} { s: '#CB', c: { 'n.pk': { $lte: 0 } }, b: 1, g: 'map,pair,json', }, // Ignore trailing comma at end of map. { s: '#CA #CB', c: { 'n.pk': { $lte: 0 } }, b: 1, g: 'map,pair,comma,jsonic', }, { s: [CA, ZZ], g: 'end,jsonic' }, // Comma means a new pair at same pair-key level. { s: '#CA', c: { 'n.pk': { $lte: 0 } }, r: 'pair', g: 'map,pair,json', }, // TODO: try CA VAL ? works anywhere? // Comma means a new pair if implicit top level map. { s: '#CA', c: { 'n.dmap': { $lte: 1 } }, r: 'pair', g: 'map,pair,jsonic', }, // TODO: try VAL CL ? works anywhere? // Value means a new pair if implicit top level map. { s: '#KEY', c: { 'n.dmap': { $lte: 1 } }, r: 'pair', b: 1, g: 'map,pair,imp,jsonic', }, // End of implicit path (eg. a:b:1), keep closing until pk=0. { s: ['#CB #CA #CS #KEY'], c: { 'n.pk': { $gt: 0 } }, b: 1, g: 'map,pair,imp,path,jsonic', }, // Can't close a map with `]` { s: '#CS', e: (r) => r.c0, g: 'end,jsonic' }, // Fail if auto-close option is false. { s: '#ZZ', e: '@finish', g: 'map,pair,json' }, // Who needs commas anyway? { r: 'pair', b: 1, g: 'map,pair,imp,jsonic', }, ], { append: true, delete: [0, 1] }); }); // push onto node jsonic.rule('elem', (rs, p) => { rs .fnref({ ...fnm, '@elem-bc': (r, ctx) => { if (true === r.u.pair) { if (ctx.cfg.list.pair) { // list.pair: push pair as object element into the list let key = r.u.key; let val = r.child.node; val = undefined === val ? null : val; let pairObj = Object.create(null); pairObj[key] = val; r.node.push(pairObj); } else { r.u.prev = r.node[r.u.key]; pairval(r, ctx); } } if (true === r.u.child) { let val = r.child.node; val = undefined === val ? null : val; let prev = r.node['child$']; if (undefined === prev) { r.node['child$'] = val; } else { r.node['child$'] = ctx.cfg.map.merge ? ctx.cfg.map.merge(prev, val, r, ctx) : ctx.cfg.map.extend ? deep(prev, val) : val; } } } }) .open([ // Empty commas insert null elements. // Note that close consumes a comma, so b:2 works. { s: '#CA #CA', b: 2, u: { done: true }, a: (r) => r.node.push(null), g: 'list,elem,imp,null,jsonic', }, { s: '#CA', u: { done: true }, a: (r) => r.node.push(null), g: 'list,elem,imp,null,jsonic', }, { s: '#KEY #CL', e: (p.cfg.list.property || p.cfg.list.pair) ? undefined : (_r, ctx) => ctx.t0, p: 'val', n: { pk: 1, dmap: 1 }, u: { done: true, pair: true, list: true }, a: '@pairkey', g: 'elem,pair,jsonic', }, // list.child: bare colon `:value` stores value on child$ property. p.cfg.list.child && { s: '#CL', p: 'val', u: { done: true, child: true, list: true }, g: 'elem,child,jsonic', }, ]) // .bc('@bc') .close([ // Ignore trailing comma. { s: ['#CA', '#CS #ZZ'], b: 1, g: 'list,elem,comma,jsonic' }, // Next element. { s: '#CA', r: 'elem', g: 'list,elem,json' }, // End of list. { s: '#CS', b: 1, g: 'list,elem,json' }, // Fail if auto-close option is false. { s: '#ZZ', e: '@finish', g: 'list,elem,json' }, // Can't close a list with `}` { s: '#CB', e: (r) => r.c0, g: 'end,jsonic' }, // Who needs commas anyway? { r: 'elem', b: 1, g: 'list,elem,imp,jsonic' }, ], { delete: [-1, -2] }); }); } function makeJSON(jsonic) { let justJSON = jsonic.make({ grammar$: false, text: { lex: false }, number: { hex: false, oct: false, bin: false, sep: null, exclude: /^00+/, }, string: { chars: '"', multiChars: '', allowUnknown: false, escape: { v: null }, }, comment: { lex: false }, map: { extend: false }, lex: { empty: false }, rule: { finish: false, include: 'json' }, result: { fail: [undefined, NaN] }, tokenSet: { KEY: ['#ST', null, null, null], }, }); grammar(justJSON); return justJSON; } //# sourceMappingURL=grammar.js.map ================================================ FILE: dist/jsonic-bnf-cli.d.ts ================================================ export declare function run(argv: string[], console: Console): Promise; ================================================ FILE: dist/jsonic-bnf-cli.js ================================================ "use strict"; /* Copyright (c) 2025 Richard Rodger and other contributors, MIT License */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.run = run; /* jsonic-bnf-cli.ts * CLI wrapper for the BNF -> jsonic grammar spec converter. */ const node_fs_1 = __importDefault(require("node:fs")); const bnf_1 = require("./bnf"); const jsonic_1 = require("./jsonic"); async function run(argv, console) { const args = { help: false, stdin: false, files: [], inline: [], start: undefined, tag: undefined, space: 2, // When set, convert and install the grammar, parse each sample, // and report the tree (or an error) instead of the spec. parse: [], parseFiles: [], }; for (let aI = 2; aI < argv.length; aI++) { const arg = argv[aI]; if ('-' === arg) { args.stdin = true; } else if ('--help' === arg || '-h' === arg) { args.help = true; } else if ('--file' === arg || '-f' === arg) { args.files.push(argv[++aI]); } else if ('--start' === arg || '-s' === arg) { args.start = argv[++aI]; } else if ('--tag' === arg || '-t' === arg) { args.tag = argv[++aI]; } else if ('--compact' === arg || '-c' === arg) { args.space = 0; } else if ('--parse' === arg || '-P' === arg) { args.parse.push(argv[++aI]); } else if ('--parse-file' === arg) { args.parseFiles.push(argv[++aI]); } else if (arg && !arg.startsWith('-')) { args.inline.push(arg); } } if (args.help) { return help(console); } let src = ''; for (const fp of args.files) { if ('string' === typeof fp && '' !== fp) { src += node_fs_1.default.readFileSync(fp).toString() + '\n'; } } for (const inline of args.inline) { src += inline + '\n'; } if ('' === src.trim() || args.stdin) { src += await readStdin(console); } const spec = (0, bnf_1.bnf)(src, { start: args.start, tag: args.tag }); // Parse-mode: validate the grammar against one or more sample // inputs and print their parse trees. Exits 1 if any sample fails. if (args.parse.length > 0 || args.parseFiles.length > 0) { const samples = []; for (const fp of args.parseFiles) { samples.push({ label: fp, input: node_fs_1.default.readFileSync(fp).toString(), }); } for (const inp of args.parse) { samples.push({ label: inp, input: inp }); } const j = jsonic_1.Jsonic.make(); j.grammar(spec); let failed = 0; for (const { label, input } of samples) { try { const tree = j(input); console.log(`ok: ${JSON.stringify(label)} -> ` + JSON.stringify(tree, null, args.space || undefined)); } catch (e) { failed++; const msg = (e?.message || String(e)).split('\n')[0]; console.error(`fail: ${JSON.stringify(label)}: ${msg}`); } } if (failed > 0) { process.exitCode = 1; } return; } console.log(JSON.stringify(spec, null, args.space || undefined)); } async function readStdin(console) { if ('string' === typeof console.test$) { return console.test$; } if (process.stdin.isTTY) return ''; let s = ''; process.stdin.setEncoding('utf8'); for await (const p of process.stdin) s += p; return s; } function help(console) { console.log(` jsonic-bnf: convert a BNF grammar into a jsonic grammar spec. Usage: jsonic-bnf []* Arguments: - Read BNF source from stdin. --file Read BNF source from (repeatable). -f --start Set the start rule (defaults to the first -s production). --tag Group tag applied to every emitted alt. -t Defaults to \`bnf\`. --compact Emit single-line JSON (default indent is 2). -c --parse Parse against the generated grammar -P and print its parse tree. Repeatable. Exits non-zero if any sample fails. --parse-file Parse the contents of against the generated grammar (repeatable). --help Print this help message. -h Examples: > jsonic-bnf ' ::= "hi" | "hello"' > jsonic-bnf -f grammar.bnf > echo ' ::= "a"' | jsonic-bnf - > jsonic-bnf -f grammar.bnf --parse 'hi' `); } //# sourceMappingURL=jsonic-bnf-cli.js.map ================================================ FILE: dist/jsonic-cli.d.ts ================================================ export declare function run(argv: string[], console: Console): Promise; ================================================ FILE: dist/jsonic-cli.js ================================================ "use strict"; /* Copyright (c) 2020-2024 Richard Rodger, Oliver Sturm, and other contributors, MIT License */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.run = run; const node_fs_1 = __importDefault(require("node:fs")); const jsonic_1 = require("./jsonic"); const debug_1 = require("./debug"); async function run(argv, console) { const args = { help: false, stdin: false, sources: [], files: [], options: [], meta: [], plugins: [], }; let plugins = {}; let accept_args = true; for (let aI = 2; aI < argv.length; aI++) { let arg = argv[aI]; if (accept_args && arg.startsWith('-')) { if ('-' === arg) { args.stdin = true; } // else if ('--' === arg) { accept_args = false; } // else if ('--file' === arg || '-f' === arg) { args.files.push(argv[++aI]); } // else if ('--option' === arg || '-o' === arg) { args.options.push(argv[++aI]); } // else if ('--meta' === arg || '-m' === arg) { args.meta.push(argv[++aI]); } // else if ('--debug' === arg || '-d' === arg) { plugins.debug = debug_1.Debug; args.meta.push('log=-1'); } // else if ('--help' === arg || '-h' === arg) { args.help = true; } // else if ('--plugin' === arg || '-p' === arg) { args.plugins.push(argv[++aI]); } // else if ('--nice' === arg || '-n' === arg) { args.options.push('JSON.space=2'); } // else { args.sources.push(arg); } } // else { args.sources.push(arg); } } if (args.help) { return help(console); } let options = handle_props(args.options); let meta = handle_props(args.meta); plugins = { ...plugins, ...handle_plugins(args.plugins) }; options.debug = options.debug || {}; options.debug.get_console = () => console; let jsonic = jsonic_1.Jsonic.make(options); for (let pn in plugins) { jsonic.use(plugins[pn], options.plugin?.[pn] || {}); } if (null != plugins.debug) { console.log(jsonic.debug.describe() + '\n=== PARSE ==='); } let data = { val: null }; for (let fp of args.files) { if ('string' === typeof fp && '' !== fp) { jsonic_1.util.deep(data, { val: jsonic(node_fs_1.default.readFileSync(fp).toString(), meta) }); } } if (0 === args.sources.length || args.stdin) { let stdin = await read_stdin(console); jsonic_1.util.deep(data, { val: jsonic(stdin, meta) }); } for (let src of args.sources) { jsonic_1.util.deep(data, { val: jsonic(src, meta) }); } options.JSON = null == options.JSON || 'object' !== typeof options.JSON ? {} : options.JSON; let replacer = (0, jsonic_1.Jsonic)(options.JSON.replacer); let space = (0, jsonic_1.Jsonic)(options.JSON.space); replacer = Array.isArray(replacer) ? replacer : null == replacer ? null : [replacer]; let json = JSON.stringify(data.val, replacer, space); console.log(json); } async function read_stdin(console) { if ('string' === typeof console.test$) { return console.test$; } if (process.stdin.isTTY) return ''; let s = ''; process.stdin.setEncoding('utf8'); for await (const p of process.stdin) s += p; return s; } // NOTE: uses vanilla Jsonic to parse arg vals, so you can set complex // properties. This will break if core Jsonic is broken. function handle_props(propvals) { let out = {}; for (let propval of propvals) { let pv = propval.split(/=/); if ('' !== pv[0] && '' !== pv[1]) { let val = (0, jsonic_1.Jsonic)(pv[1]); jsonic_1.util.prop(out, pv[0], val); } } return out; } function handle_plugins(plugins) { let out = {}; for (let name of plugins) { try { out[name] = require(name); } catch (e) { let err = e; // Might be @jsonic plugin if (!name.startsWith('@')) { try { out[name] = require('@jsonic/' + name); } catch (e) { throw err; // NOTE: throws original error } } else { throw err; } } // Handle some variations in the way the plugin function is exported. if ('function' !== typeof out[name]) { let refname = (name.match(/([^.\\\/]+)($|\.[^.]+$)/) || [])[1]; refname = null != refname ? refname.toLowerCase() : refname; // See test plugin test/p1.js if ('function' == typeof out[name].default) { out[name] = out[name].default; } // else if (null != refname && 'function' == typeof out[name][camel(refname)]) { out[name] = out[name][camel(refname)]; } // See test plugin test/p2.js else if (null != refname && 'function' == typeof out[name][refname]) { out[refname] = out[name][refname]; delete out[name]; } // else { throw new Error('Plugin is not a function: ' + name); } } } return out; } function camel(s) { return (s[0].toUpperCase() + s .substring(1) .replace(/-(\w)/g, (m) => m[1][0].toUpperCase() + m[1].substring(1))); } function help(console) { let s = ` A JSON parser that isn't strict. Usage: jsonic []* where is the source text to be parsed into JSON. If omitted, the source text is read from STDIN. If multiple source texts are provided, they will be merged in precedence (from highest) right to left, STDIN, . are the command arguments: - Alias for STDIN. --file Load and parse . -f --option Set option to , where -o can be a dotted path (see example below). --nice Print JSON indented over multiple lines. -n --meta Set parse meta data to , where -m can be a dotted path (see option example). --plugin Load a plugin, where is the plugin module -p reference (name or path). --debug Print abbreviated lex and parse logs for debugging, -d alias of \`--meta log = -1\`. --help Print this help message. -h Output: Output is generated by the built-in JSON.stringify method. The \`replacer\` and \`space\` arguments can be specified using \`-o JSON.replacer=...\` and \`-o JSON.space=...\` respectively. Plugins The built-in plugins (found in the ./plugin folder of the distribution) can be specified using the abbreviated references: directive, multisource, csv, toml, ... Plugin options can be specified using: \`-o plugin..