Repository: spencermountain/compromise Branch: master Commit: 758eee33b8c0 Files: 1062 Total size: 5.9 MB Directory structure: gitextract_c9wmq0vi/ ├── README.md ├── builds/ │ ├── compromise.js │ ├── one/ │ │ ├── compromise-one.cjs │ │ └── compromise-one.mjs │ ├── three/ │ │ ├── compromise-three.cjs │ │ └── compromise-three.mjs │ └── two/ │ ├── compromise-two.cjs │ └── compromise-two.mjs ├── changelog.md ├── data/ │ ├── README.md │ ├── lexicon/ │ │ ├── adjectives/ │ │ │ ├── adjectives.js │ │ │ └── comparables.js │ │ ├── dates/ │ │ │ ├── dates.js │ │ │ ├── durations.js │ │ │ ├── months.js │ │ │ └── weekdays.js │ │ ├── index.js │ │ ├── misc/ │ │ │ ├── adverbs.js │ │ │ ├── conjunctions.js │ │ │ ├── currencies.js │ │ │ ├── determiners.js │ │ │ ├── expressions.js │ │ │ └── prepositions.js │ │ ├── misc.js │ │ ├── nouns/ │ │ │ ├── actors.js │ │ │ ├── demonyms.js │ │ │ ├── organizations.js │ │ │ ├── possessives.js │ │ │ ├── pronouns.js │ │ │ ├── properNouns.js │ │ │ ├── relative-prounoun.js │ │ │ ├── singulars.js │ │ │ ├── sportsTeams.js │ │ │ └── uncountables.js │ │ ├── numbers/ │ │ │ ├── cardinals.js │ │ │ ├── multiples.js │ │ │ ├── ordinals.js │ │ │ └── units.js │ │ ├── people/ │ │ │ ├── femaleNames.js │ │ │ ├── firstnames.js │ │ │ ├── honorifics.js │ │ │ ├── lastnames.js │ │ │ ├── maleNames.js │ │ │ └── people.js │ │ ├── places/ │ │ │ ├── cities.js │ │ │ ├── countries.js │ │ │ ├── places.js │ │ │ └── regions.js │ │ ├── switches/ │ │ │ ├── actor-verb.js │ │ │ ├── adj-gerund.js │ │ │ ├── adj-noun.js │ │ │ ├── adj-past.js │ │ │ ├── adj-present.js │ │ │ ├── noun-gerund.js │ │ │ ├── noun-verb.js │ │ │ ├── person-adj.js │ │ │ ├── person-date.js │ │ │ ├── person-noun.js │ │ │ ├── person-place.js │ │ │ ├── person-verb.js │ │ │ └── unit-noun.js │ │ └── verbs/ │ │ ├── infinitives.js │ │ ├── modals.js │ │ ├── participles.js │ │ ├── phrasals.js │ │ └── verbs.js │ └── pairs/ │ ├── AdjToNoun.js │ ├── Comparative.js │ ├── Gerund.js │ ├── Participle.js │ ├── PastTense.js │ ├── PresentTense.js │ ├── Superlative.js │ └── index.js ├── demos/ │ ├── performance.html │ ├── plugin.html │ └── web-worker/ │ ├── _worker.js │ └── index.html ├── eslint.config.js ├── one/ │ └── package.json ├── package.json ├── plugins/ │ ├── _experiments/ │ │ ├── ast/ │ │ │ ├── README.md │ │ │ ├── package.json │ │ │ ├── scratch.js │ │ │ └── src/ │ │ │ ├── ast.js │ │ │ ├── compute/ │ │ │ │ └── index.js │ │ │ ├── lines.js │ │ │ └── plugin.js │ │ ├── cmd-k/ │ │ │ ├── README.md │ │ │ ├── package.json │ │ │ ├── scratch.js │ │ │ └── src/ │ │ │ ├── plugin.js │ │ │ ├── searchBang.js │ │ │ └── slashCmd.js │ │ ├── compress/ │ │ │ ├── README.md │ │ │ └── src/ │ │ │ ├── index.js │ │ │ └── lz.js │ │ ├── markdown/ │ │ │ ├── README.md │ │ │ ├── package.json │ │ │ ├── scratch.js │ │ │ └── src/ │ │ │ ├── Wrap.js │ │ │ ├── parse/ │ │ │ │ ├── crawl.js │ │ │ │ ├── index.js │ │ │ │ ├── toPlaintext.js │ │ │ │ └── uuid.js │ │ │ └── plugin.js │ │ └── sentiment/ │ │ ├── README.md │ │ ├── package.json │ │ ├── scratch.js │ │ ├── src/ │ │ │ ├── data/ │ │ │ │ ├── _pckd.js │ │ │ │ └── index.js │ │ │ ├── emoji.js │ │ │ ├── escape.js │ │ │ ├── lib.js │ │ │ └── plugin.js │ │ └── test.js │ ├── dates/ │ │ ├── README.md │ │ ├── builds/ │ │ │ ├── compromise-dates.cjs │ │ │ └── compromise-dates.mjs │ │ ├── changelog.md │ │ ├── demo/ │ │ │ └── index.html │ │ ├── index.d.cts │ │ ├── index.d.ts │ │ ├── package.json │ │ ├── scratch.js │ │ ├── scripts/ │ │ │ ├── perf.js │ │ │ └── version.js │ │ ├── src/ │ │ │ ├── _version.js │ │ │ ├── api/ │ │ │ │ ├── dates.js │ │ │ │ ├── durations/ │ │ │ │ │ ├── index.js │ │ │ │ │ └── parse.js │ │ │ │ ├── find/ │ │ │ │ │ ├── index.js │ │ │ │ │ └── split.js │ │ │ │ ├── index.js │ │ │ │ ├── normalize.js │ │ │ │ ├── parse/ │ │ │ │ │ ├── index.js │ │ │ │ │ ├── normalize.js │ │ │ │ │ ├── one/ │ │ │ │ │ │ ├── 01-tokenize/ │ │ │ │ │ │ │ ├── 01-shift.js │ │ │ │ │ │ │ ├── 02-counter.js │ │ │ │ │ │ │ ├── 03-time.js │ │ │ │ │ │ │ ├── 04-relative.js │ │ │ │ │ │ │ ├── 05-section.js │ │ │ │ │ │ │ ├── 06-timezone.js │ │ │ │ │ │ │ ├── 07-weekday.js │ │ │ │ │ │ │ ├── _timezones.js │ │ │ │ │ │ │ └── index.js │ │ │ │ │ │ ├── 02-parse/ │ │ │ │ │ │ │ ├── 01-today.js │ │ │ │ │ │ │ ├── 02-holidays.js │ │ │ │ │ │ │ ├── 03-next-last.js │ │ │ │ │ │ │ ├── 04-yearly.js │ │ │ │ │ │ │ ├── 05-explicit.js │ │ │ │ │ │ │ └── index.js │ │ │ │ │ │ ├── 03-transform/ │ │ │ │ │ │ │ ├── addCounter.js │ │ │ │ │ │ │ └── index.js │ │ │ │ │ │ ├── index.js │ │ │ │ │ │ └── units/ │ │ │ │ │ │ ├── Unit.js │ │ │ │ │ │ ├── _day.js │ │ │ │ │ │ ├── _time.js │ │ │ │ │ │ ├── _week.js │ │ │ │ │ │ ├── _year.js │ │ │ │ │ │ └── index.js │ │ │ │ │ └── range/ │ │ │ │ │ ├── 01-two-times.js │ │ │ │ │ ├── 02-date-range.js │ │ │ │ │ ├── 03-one-date.js │ │ │ │ │ ├── _reverse.js │ │ │ │ │ ├── combos/ │ │ │ │ │ │ └── index.js │ │ │ │ │ ├── index.js │ │ │ │ │ └── intervals/ │ │ │ │ │ └── index.js │ │ │ │ ├── times.js │ │ │ │ └── toJSON.js │ │ │ ├── compute/ │ │ │ │ ├── 00-year.js │ │ │ │ ├── 01-time-range.js │ │ │ │ ├── 02-timezone.js │ │ │ │ ├── 03-fixup.js │ │ │ │ ├── index.js │ │ │ │ └── matches.js │ │ │ ├── debug.js │ │ │ ├── model/ │ │ │ │ ├── regex.js │ │ │ │ ├── tags.js │ │ │ │ └── words/ │ │ │ │ ├── dates.js │ │ │ │ ├── durations.js │ │ │ │ ├── holidays.js │ │ │ │ ├── index.js │ │ │ │ ├── times.js │ │ │ │ └── timezones.js │ │ │ └── plugin.js │ │ └── tests/ │ │ ├── _lib.js │ │ ├── ambig-month.test.js │ │ ├── ambig-week.test.js │ │ ├── ambig-weekday.test.js │ │ ├── backlog/ │ │ │ ├── combo.ignore.js │ │ │ ├── duckling.ignore.js │ │ │ ├── interval.ignore.js │ │ │ └── units.ignore.js │ │ ├── before-after.test.js │ │ ├── chronic.test.js │ │ ├── day-start.test.js │ │ ├── dmy.test.js │ │ ├── duration-range.test.js │ │ ├── duration.test.js │ │ ├── durations.test.js │ │ ├── end.test.js │ │ ├── equals.test.js │ │ ├── false-positive.test.js │ │ ├── format.test.js │ │ ├── full-iso.test.js │ │ ├── fullDates.test.js │ │ ├── has-date.test.js │ │ ├── misc.test.js │ │ ├── phrase.test.js │ │ ├── startDates.test.js │ │ ├── tagger/ │ │ │ ├── ambiguous.test.js │ │ │ ├── date-chunk.test.js │ │ │ └── date_tag.test.js │ │ ├── times.test.js │ │ ├── timezone.test.js │ │ ├── to-iso.test.js │ │ ├── today.test.js │ │ ├── tokenizer.test.js │ │ └── week.test.js │ ├── paragraphs/ │ │ ├── README.md │ │ ├── builds/ │ │ │ ├── compromise-paragraphs.cjs │ │ │ └── compromise-paragraphs.mjs │ │ ├── index.d.ts │ │ ├── package.json │ │ ├── src/ │ │ │ ├── api.js │ │ │ └── plugin.js │ │ └── tests/ │ │ ├── _lib.js │ │ └── misc.test.js │ ├── payload/ │ │ ├── README.md │ │ ├── builds/ │ │ │ ├── compromise-payload.cjs │ │ │ └── compromise-payload.mjs │ │ ├── index.d.ts │ │ ├── package.json │ │ ├── scratch.js │ │ ├── src/ │ │ │ ├── debug.js │ │ │ └── plugin.js │ │ └── tests/ │ │ ├── _lib.js │ │ └── payload.test.js │ ├── speech/ │ │ ├── README.md │ │ ├── builds/ │ │ │ ├── compromise-speech.cjs │ │ │ └── compromise-speech.mjs │ │ ├── demo/ │ │ │ └── index.html │ │ ├── index.d.ts │ │ ├── package.json │ │ ├── scratch.js │ │ ├── src/ │ │ │ ├── api.js │ │ │ ├── compute/ │ │ │ │ ├── index.js │ │ │ │ ├── soundsLike/ │ │ │ │ │ ├── index.js │ │ │ │ │ ├── metaphone.js │ │ │ │ │ └── transformations.js │ │ │ │ └── syllables/ │ │ │ │ ├── index.js │ │ │ │ ├── postProcess.js │ │ │ │ └── syllables.js │ │ │ └── plugin.js │ │ └── tests/ │ │ ├── _lib.js │ │ ├── soundsLike.test.js │ │ └── syllables.test.js │ ├── speed/ │ │ ├── README.md │ │ ├── builds/ │ │ │ ├── compromise-speed.cjs │ │ │ └── compromise-speed.mjs │ │ ├── demo/ │ │ │ └── index.html │ │ ├── index.d.ts │ │ ├── package.json │ │ ├── scratch.js │ │ ├── scripts/ │ │ │ └── version.js │ │ ├── src/ │ │ │ ├── _version.js │ │ │ ├── keypress/ │ │ │ │ └── index.js │ │ │ ├── lazyParse/ │ │ │ │ ├── lazyParse.js │ │ │ │ ├── maybeMatch.js │ │ │ │ └── plugin.js │ │ │ ├── plugin.js │ │ │ ├── stream/ │ │ │ │ └── streamFile.js │ │ │ └── workerPool/ │ │ │ ├── index.js │ │ │ ├── plugin.js │ │ │ ├── pool/ │ │ │ │ ├── create.js │ │ │ │ └── worker.js │ │ │ └── rip.js │ │ └── tests/ │ │ ├── _lib.js │ │ ├── files/ │ │ │ └── freshPrince.txt │ │ └── stream.test.js │ ├── stats/ │ │ ├── README.md │ │ ├── builds/ │ │ │ ├── compromise-stats.cjs │ │ │ └── compromise-stats.mjs │ │ ├── demo/ │ │ │ └── index.html │ │ ├── index.d.ts │ │ ├── package.json │ │ ├── scratch.js │ │ ├── scripts/ │ │ │ ├── generate.js │ │ │ └── pack.js │ │ ├── src/ │ │ │ ├── compute.js │ │ │ ├── ngram/ │ │ │ │ ├── endGrams.js │ │ │ │ ├── getGrams.js │ │ │ │ ├── index.js │ │ │ │ ├── sort.js │ │ │ │ ├── startGrams.js │ │ │ │ └── tokenize.js │ │ │ ├── plugin.js │ │ │ └── tfidf/ │ │ │ ├── _model.js │ │ │ ├── idf.js │ │ │ ├── index.js │ │ │ ├── tf.js │ │ │ └── unpack.js │ │ └── tests/ │ │ ├── _lib.js │ │ ├── edgegram.test.js │ │ ├── misc.test.js │ │ └── ngram.test.js │ └── wikipedia/ │ ├── README.md │ ├── builds/ │ │ ├── compromise-wikipedia.cjs │ │ └── compromise-wikipedia.mjs │ ├── config.js │ ├── demo/ │ │ └── index.html │ ├── index.d.ts │ ├── package.json │ ├── scratch.js │ ├── scripts/ │ │ ├── generate/ │ │ │ ├── 01-download.js │ │ │ ├── 02-filter.js │ │ │ ├── 03-compress.js │ │ │ ├── _no-list.js │ │ │ └── index.js │ │ ├── perf.js │ │ └── stat.js │ ├── src/ │ │ ├── _model.js │ │ └── plugin.js │ └── tests/ │ ├── _lib.js │ └── misc.test.js ├── scratch.js ├── scripts/ │ ├── chunks.js │ ├── coreference/ │ │ └── index.js │ ├── debug.js │ ├── match-linter.js │ ├── match.js │ ├── pack.js │ ├── patterns/ │ │ ├── manual.js │ │ ├── patterns.js │ │ └── tester.js │ ├── perf/ │ │ ├── _fetch.js │ │ ├── flame/ │ │ │ ├── _sotu-text.js │ │ │ └── index.js │ │ ├── index.js │ │ ├── novel.js │ │ ├── package.json │ │ ├── pool/ │ │ │ ├── _lib.js │ │ │ ├── lookup-worker.js │ │ │ ├── pool.js │ │ │ └── worker.js │ │ └── versions.js │ ├── plugins.js │ ├── test/ │ │ ├── coverage.js │ │ ├── index.js │ │ ├── smoke.test.js │ │ ├── stress.js │ │ └── types.ts │ ├── typescript/ │ │ ├── one.ts │ │ ├── three.ts │ │ └── two.ts │ └── version.js ├── src/ │ ├── 1-one/ │ │ ├── cache/ │ │ │ ├── api.js │ │ │ ├── compute.js │ │ │ ├── methods/ │ │ │ │ ├── cacheDoc.js │ │ │ │ └── index.js │ │ │ └── plugin.js │ │ ├── change/ │ │ │ ├── api/ │ │ │ │ ├── case.js │ │ │ │ ├── concat.js │ │ │ │ ├── harden.js │ │ │ │ ├── index.js │ │ │ │ ├── insert.js │ │ │ │ ├── lib/ │ │ │ │ │ ├── _sort.js │ │ │ │ │ ├── insert.js │ │ │ │ │ └── remove.js │ │ │ │ ├── remove.js │ │ │ │ ├── replace.js │ │ │ │ ├── sort.js │ │ │ │ └── whitespace.js │ │ │ ├── compute/ │ │ │ │ ├── index.js │ │ │ │ └── uuid.js │ │ │ └── plugin.js │ │ ├── contraction-one/ │ │ │ ├── compute/ │ │ │ │ ├── contractions/ │ │ │ │ │ ├── _splice.js │ │ │ │ │ ├── apostrophe-d.js │ │ │ │ │ ├── apostrophe-t.js │ │ │ │ │ ├── french.js │ │ │ │ │ ├── index.js │ │ │ │ │ ├── number-range.js │ │ │ │ │ └── number-unit.js │ │ │ │ └── index.js │ │ │ ├── model/ │ │ │ │ ├── contractions.js │ │ │ │ ├── index.js │ │ │ │ └── number-suffix.js │ │ │ └── plugin.js │ │ ├── freeze/ │ │ │ ├── compute.js │ │ │ ├── debug.js │ │ │ └── plugin.js │ │ ├── lexicon/ │ │ │ ├── compute/ │ │ │ │ ├── index.js │ │ │ │ ├── multi-word.js │ │ │ │ └── single-word.js │ │ │ ├── lib.js │ │ │ ├── methods/ │ │ │ │ ├── expand.js │ │ │ │ └── index.js │ │ │ └── plugin.js │ │ ├── lookup/ │ │ │ ├── api/ │ │ │ │ ├── buildTrie/ │ │ │ │ │ ├── compress.js │ │ │ │ │ └── index.js │ │ │ │ ├── index.js │ │ │ │ └── scan.js │ │ │ └── plugin.js │ │ ├── match/ │ │ │ ├── api/ │ │ │ │ ├── _lib.js │ │ │ │ ├── index.js │ │ │ │ ├── join.js │ │ │ │ ├── lookaround.js │ │ │ │ ├── match.js │ │ │ │ └── split.js │ │ │ ├── lib.js │ │ │ ├── methods/ │ │ │ │ ├── index.js │ │ │ │ ├── match/ │ │ │ │ │ ├── 01-failFast.js │ │ │ │ │ ├── 02-from-here.js │ │ │ │ │ ├── 03-getGroup.js │ │ │ │ │ ├── 03-notIf.js │ │ │ │ │ ├── _lib.js │ │ │ │ │ ├── index.js │ │ │ │ │ ├── steps/ │ │ │ │ │ │ ├── and-block.js │ │ │ │ │ │ ├── astrix.js │ │ │ │ │ │ ├── contraction-skip.js │ │ │ │ │ │ ├── greedy-match.js │ │ │ │ │ │ ├── logic/ │ │ │ │ │ │ │ ├── and-or.js │ │ │ │ │ │ │ ├── greedy.js │ │ │ │ │ │ │ └── negative-greedy.js │ │ │ │ │ │ ├── negative.js │ │ │ │ │ │ ├── optional-match.js │ │ │ │ │ │ ├── or-block.js │ │ │ │ │ │ └── simple-match.js │ │ │ │ │ └── term/ │ │ │ │ │ ├── _fuzzy.js │ │ │ │ │ └── doesMatch.js │ │ │ │ ├── parseMatch/ │ │ │ │ │ ├── 01-parseBlocks.js │ │ │ │ │ ├── 02-parseToken.js │ │ │ │ │ ├── 03-splitHyphens.js │ │ │ │ │ ├── 04-inflect-root.js │ │ │ │ │ ├── 05-postProcess.js │ │ │ │ │ └── index.js │ │ │ │ └── termMethods.js │ │ │ └── plugin.js │ │ ├── output/ │ │ │ ├── api/ │ │ │ │ ├── _fmts.js │ │ │ │ ├── _text.js │ │ │ │ ├── debug.js │ │ │ │ ├── html.js │ │ │ │ ├── index.js │ │ │ │ ├── json.js │ │ │ │ ├── out.js │ │ │ │ ├── text.js │ │ │ │ └── wrap.js │ │ │ ├── methods/ │ │ │ │ ├── debug/ │ │ │ │ │ ├── _color.js │ │ │ │ │ ├── chunks.js │ │ │ │ │ ├── client-side.js │ │ │ │ │ ├── highlight.js │ │ │ │ │ ├── index.js │ │ │ │ │ └── tags.js │ │ │ │ ├── hash.js │ │ │ │ └── index.js │ │ │ └── plugin.js │ │ ├── pointers/ │ │ │ ├── api/ │ │ │ │ ├── index.js │ │ │ │ └── lib/ │ │ │ │ ├── _lib.js │ │ │ │ ├── difference.js │ │ │ │ ├── intersection.js │ │ │ │ ├── split.js │ │ │ │ └── union.js │ │ │ ├── methods/ │ │ │ │ ├── getDoc.js │ │ │ │ └── index.js │ │ │ └── plugin.js │ │ ├── sweep/ │ │ │ ├── api.js │ │ │ ├── lib.js │ │ │ ├── methods/ │ │ │ │ ├── buildNet/ │ │ │ │ │ ├── 01-parse.js │ │ │ │ │ └── index.js │ │ │ │ ├── index.js │ │ │ │ ├── sweep/ │ │ │ │ │ ├── 01-getHooks.js │ │ │ │ │ ├── 02-trim-down.js │ │ │ │ │ ├── 04-runMatch.js │ │ │ │ │ └── index.js │ │ │ │ └── tagger/ │ │ │ │ ├── canBe.js │ │ │ │ └── index.js │ │ │ └── plugin.js │ │ ├── tag/ │ │ │ ├── api/ │ │ │ │ ├── index.js │ │ │ │ └── tag.js │ │ │ ├── compute/ │ │ │ │ └── tagRank.js │ │ │ ├── lib.js │ │ │ ├── methods/ │ │ │ │ ├── addTags/ │ │ │ │ │ ├── 01-validate.js │ │ │ │ │ ├── 02-fmt.js │ │ │ │ │ ├── _colors.js │ │ │ │ │ └── index.js │ │ │ │ ├── canBe.js │ │ │ │ ├── index.js │ │ │ │ ├── setTag.js │ │ │ │ └── unTag.js │ │ │ └── plugin.js │ │ ├── tokenize/ │ │ │ ├── compute/ │ │ │ │ ├── alias.js │ │ │ │ ├── freq.js │ │ │ │ ├── index.js │ │ │ │ ├── machine.js │ │ │ │ ├── normal/ │ │ │ │ │ ├── 01-cleanup.js │ │ │ │ │ ├── 02-acronyms.js │ │ │ │ │ └── index.js │ │ │ │ ├── offset.js │ │ │ │ ├── reindex.js │ │ │ │ └── wordCount.js │ │ │ ├── methods/ │ │ │ │ ├── 01-sentences/ │ │ │ │ │ ├── 01-simple-split.js │ │ │ │ │ ├── 02-simple-merge.js │ │ │ │ │ ├── 03-smart-merge.js │ │ │ │ │ ├── 04-quote-merge.js │ │ │ │ │ ├── 05-parens-merge.js │ │ │ │ │ ├── index.js │ │ │ │ │ └── is-sentence.js │ │ │ │ ├── 02-terms/ │ │ │ │ │ ├── 01-hyphens.js │ │ │ │ │ ├── 02-slashes.js │ │ │ │ │ ├── 03-ranges.js │ │ │ │ │ └── index.js │ │ │ │ ├── 03-whitespace/ │ │ │ │ │ ├── index.js │ │ │ │ │ └── tokenize.js │ │ │ │ ├── index.js │ │ │ │ ├── parse.js │ │ │ │ └── unicode.js │ │ │ ├── model/ │ │ │ │ ├── abbreviations/ │ │ │ │ │ ├── honorifics.js │ │ │ │ │ ├── misc.js │ │ │ │ │ ├── months.js │ │ │ │ │ ├── nouns.js │ │ │ │ │ ├── organizations.js │ │ │ │ │ ├── places.js │ │ │ │ │ └── units.js │ │ │ │ ├── aliases.js │ │ │ │ ├── index.js │ │ │ │ ├── lexicon.js │ │ │ │ ├── prefixes.js │ │ │ │ ├── punctuation.js │ │ │ │ ├── suffixes.js │ │ │ │ └── unicode.js │ │ │ └── plugin.js │ │ └── typeahead/ │ │ ├── api.js │ │ ├── compute.js │ │ ├── lib/ │ │ │ ├── allPrefixes.js │ │ │ └── index.js │ │ └── plugin.js │ ├── 2-two/ │ │ ├── contraction-two/ │ │ │ ├── api/ │ │ │ │ ├── contract.js │ │ │ │ └── index.js │ │ │ ├── compute/ │ │ │ │ ├── _splice.js │ │ │ │ ├── apostrophe-d.js │ │ │ │ ├── apostrophe-s.js │ │ │ │ ├── apostrophe-t.js │ │ │ │ ├── index.js │ │ │ │ └── isPossessive.js │ │ │ └── plugin.js │ │ ├── lazy/ │ │ │ ├── lazyParse.js │ │ │ ├── maybeMatch.js │ │ │ └── plugin.js │ │ ├── postTagger/ │ │ │ ├── api.js │ │ │ ├── compute/ │ │ │ │ └── index.js │ │ │ ├── model/ │ │ │ │ ├── _misc.js │ │ │ │ ├── adjective/ │ │ │ │ │ ├── adj-adverb.js │ │ │ │ │ ├── adj-gerund.js │ │ │ │ │ ├── adj-noun.js │ │ │ │ │ ├── adj-verb.js │ │ │ │ │ └── adjective.js │ │ │ │ ├── adverb.js │ │ │ │ ├── conjunctions.js │ │ │ │ ├── dates/ │ │ │ │ │ ├── date-phrase.js │ │ │ │ │ └── date.js │ │ │ │ ├── expressions.js │ │ │ │ ├── index.js │ │ │ │ ├── nouns/ │ │ │ │ │ ├── nouns.js │ │ │ │ │ ├── organizations.js │ │ │ │ │ └── places.js │ │ │ │ ├── numbers/ │ │ │ │ │ ├── fractions.js │ │ │ │ │ ├── money.js │ │ │ │ │ └── numbers.js │ │ │ │ ├── person/ │ │ │ │ │ ├── ambig-name.js │ │ │ │ │ └── person-phrase.js │ │ │ │ └── verbs/ │ │ │ │ ├── adj-gerund.js │ │ │ │ ├── auxiliary.js │ │ │ │ ├── imperative.js │ │ │ │ ├── noun-gerund.js │ │ │ │ ├── passive.js │ │ │ │ ├── phrasal.js │ │ │ │ ├── verb-noun.js │ │ │ │ └── verbs.js │ │ │ └── plugin.js │ │ ├── preTagger/ │ │ │ ├── compute/ │ │ │ │ ├── index.js │ │ │ │ ├── penn.js │ │ │ │ ├── root.js │ │ │ │ └── tagger/ │ │ │ │ ├── 1st-pass/ │ │ │ │ │ ├── 01-colons.js │ │ │ │ │ └── 02-hyphens.js │ │ │ │ ├── 2nd-pass/ │ │ │ │ │ ├── 00-tagSwitch.js │ │ │ │ │ ├── 01-case.js │ │ │ │ │ ├── 02-suffix.js │ │ │ │ │ ├── 03-regex.js │ │ │ │ │ ├── 04-prefix.js │ │ │ │ │ └── 05-year.js │ │ │ │ ├── 3rd-pass/ │ │ │ │ │ ├── 01-acronym.js │ │ │ │ │ ├── 02-neighbours.js │ │ │ │ │ ├── 03-orgWords.js │ │ │ │ │ ├── 04-placeWords.js │ │ │ │ │ ├── 05-fallback.js │ │ │ │ │ ├── 06-switches.js │ │ │ │ │ ├── 07-verb-type.js │ │ │ │ │ ├── 08-imperative.js │ │ │ │ │ ├── _adhoc.js │ │ │ │ │ └── _fillTags.js │ │ │ │ ├── _fastTag.js │ │ │ │ └── index.js │ │ │ ├── methods/ │ │ │ │ ├── expand/ │ │ │ │ │ ├── byTag.js │ │ │ │ │ └── index.js │ │ │ │ ├── index.js │ │ │ │ ├── looksPlural.js │ │ │ │ ├── quickSplit.js │ │ │ │ └── transform/ │ │ │ │ ├── adjectives/ │ │ │ │ │ ├── conjugate/ │ │ │ │ │ │ ├── fromAdverb.js │ │ │ │ │ │ ├── lib.js │ │ │ │ │ │ └── toAdverb.js │ │ │ │ │ ├── index.js │ │ │ │ │ └── inflect.js │ │ │ │ ├── index.js │ │ │ │ ├── nouns/ │ │ │ │ │ ├── index.js │ │ │ │ │ ├── toPlural/ │ │ │ │ │ │ ├── _rules.js │ │ │ │ │ │ └── index.js │ │ │ │ │ └── toSingular/ │ │ │ │ │ ├── _rules.js │ │ │ │ │ └── index.js │ │ │ │ └── verbs/ │ │ │ │ ├── conjugate/ │ │ │ │ │ └── index.js │ │ │ │ ├── getTense/ │ │ │ │ │ ├── _guess.js │ │ │ │ │ └── index.js │ │ │ │ ├── index.js │ │ │ │ └── toInfinitive/ │ │ │ │ └── index.js │ │ │ ├── model/ │ │ │ │ ├── _expand/ │ │ │ │ │ ├── index.js │ │ │ │ │ └── irregulars.js │ │ │ │ ├── clues/ │ │ │ │ │ ├── _adj.js │ │ │ │ │ ├── _gerund.js │ │ │ │ │ ├── _noun.js │ │ │ │ │ ├── _person.js │ │ │ │ │ ├── _verb.js │ │ │ │ │ ├── actor-verb.js │ │ │ │ │ ├── adj-gerund.js │ │ │ │ │ ├── adj-noun.js │ │ │ │ │ ├── adj-past.js │ │ │ │ │ ├── adj-present.js │ │ │ │ │ ├── index.js │ │ │ │ │ ├── noun-gerund.js │ │ │ │ │ ├── noun-verb.js │ │ │ │ │ ├── person-adj.js │ │ │ │ │ ├── person-date.js │ │ │ │ │ ├── person-noun.js │ │ │ │ │ ├── person-place.js │ │ │ │ │ ├── person-verb.js │ │ │ │ │ └── unit-noun.js │ │ │ │ ├── index.js │ │ │ │ ├── irregulars/ │ │ │ │ │ └── plurals.js │ │ │ │ ├── lexicon/ │ │ │ │ │ ├── _data.js │ │ │ │ │ ├── emoticons.js │ │ │ │ │ ├── frozenLex.js │ │ │ │ │ ├── index.js │ │ │ │ │ └── misc.js │ │ │ │ ├── models/ │ │ │ │ │ ├── _data.js │ │ │ │ │ └── index.js │ │ │ │ ├── orgWords.js │ │ │ │ ├── patterns/ │ │ │ │ │ ├── endsWith.js │ │ │ │ │ ├── neighbours.js │ │ │ │ │ ├── prefixes.js │ │ │ │ │ └── suffixes.js │ │ │ │ ├── personWords.js │ │ │ │ ├── placeWords.js │ │ │ │ └── regex/ │ │ │ │ ├── regex-normal.js │ │ │ │ ├── regex-numbers.js │ │ │ │ └── regex-text.js │ │ │ ├── plugin.js │ │ │ └── tagSet/ │ │ │ ├── dates.js │ │ │ ├── index.js │ │ │ ├── misc.js │ │ │ ├── nouns.js │ │ │ ├── values.js │ │ │ └── verbs.js │ │ └── swap/ │ │ ├── api/ │ │ │ ├── swap-verb.js │ │ │ └── swap.js │ │ └── plugin.js │ ├── 3-three/ │ │ ├── adjectives/ │ │ │ └── plugin.js │ │ ├── adverbs/ │ │ │ └── plugin.js │ │ ├── chunker/ │ │ │ ├── api/ │ │ │ │ ├── api.js │ │ │ │ ├── chunks.js │ │ │ │ └── clauses.js │ │ │ ├── compute/ │ │ │ │ ├── 01-easy.js │ │ │ │ ├── 02-neighbours.js │ │ │ │ ├── 03-matcher.js │ │ │ │ ├── 04-fallback.js │ │ │ │ ├── 05-fixUp.js │ │ │ │ └── index.js │ │ │ └── plugin.js │ │ ├── coreference/ │ │ │ ├── api/ │ │ │ │ └── pronouns.js │ │ │ ├── compute/ │ │ │ │ ├── findIt.js │ │ │ │ ├── findPerson.js │ │ │ │ ├── findThey.js │ │ │ │ ├── index.js │ │ │ │ └── lib.js │ │ │ └── plugin.js │ │ ├── misc/ │ │ │ ├── acronyms/ │ │ │ │ └── index.js │ │ │ ├── parentheses/ │ │ │ │ ├── fns.js │ │ │ │ └── index.js │ │ │ ├── plugin.js │ │ │ ├── possessives/ │ │ │ │ └── index.js │ │ │ ├── quotations/ │ │ │ │ ├── fns.js │ │ │ │ └── index.js │ │ │ ├── selections/ │ │ │ │ └── index.js │ │ │ └── slashes/ │ │ │ └── index.js │ │ ├── normalize/ │ │ │ ├── api.js │ │ │ ├── methods.js │ │ │ └── plugin.js │ │ ├── nouns/ │ │ │ ├── api/ │ │ │ │ ├── api.js │ │ │ │ ├── hasPlural.js │ │ │ │ ├── isPlural.js │ │ │ │ ├── isSubordinate.js │ │ │ │ ├── parse.js │ │ │ │ ├── toJSON.js │ │ │ │ ├── toPlural.js │ │ │ │ └── toSingular.js │ │ │ ├── find.js │ │ │ └── plugin.js │ │ ├── numbers/ │ │ │ ├── fractions/ │ │ │ │ ├── api.js │ │ │ │ ├── convert/ │ │ │ │ │ ├── toCardinal.js │ │ │ │ │ └── toOrdinal.js │ │ │ │ ├── find.js │ │ │ │ └── parse.js │ │ │ ├── numbers/ │ │ │ │ ├── _toString.js │ │ │ │ ├── api.js │ │ │ │ ├── find.js │ │ │ │ ├── format/ │ │ │ │ │ ├── index.js │ │ │ │ │ ├── suffix.js │ │ │ │ │ ├── toOrdinal/ │ │ │ │ │ │ ├── numOrdinal.js │ │ │ │ │ │ └── textOrdinal.js │ │ │ │ │ └── toText/ │ │ │ │ │ ├── data.js │ │ │ │ │ └── index.js │ │ │ │ ├── isUnit.js │ │ │ │ └── parse/ │ │ │ │ ├── index.js │ │ │ │ └── toNumber/ │ │ │ │ ├── data.js │ │ │ │ ├── findModifiers.js │ │ │ │ ├── index.js │ │ │ │ ├── parseDecimals.js │ │ │ │ ├── parseNumeric.js │ │ │ │ └── validate.js │ │ │ └── plugin.js │ │ ├── redact/ │ │ │ └── plugin.js │ │ ├── sentences/ │ │ │ ├── api.js │ │ │ ├── conjugate/ │ │ │ │ ├── toFuture.js │ │ │ │ ├── toInfinitive.js │ │ │ │ ├── toNegative.js │ │ │ │ ├── toPast.js │ │ │ │ └── toPresent.js │ │ │ ├── parse/ │ │ │ │ ├── index.js │ │ │ │ └── mainClause.js │ │ │ ├── plugin.js │ │ │ └── questions.js │ │ ├── topics/ │ │ │ ├── orgs/ │ │ │ │ └── api.js │ │ │ ├── people/ │ │ │ │ ├── api.js │ │ │ │ ├── find.js │ │ │ │ ├── gender.js │ │ │ │ └── parse.js │ │ │ ├── places/ │ │ │ │ ├── api.js │ │ │ │ └── find.js │ │ │ ├── plugin.js │ │ │ └── topics.js │ │ └── verbs/ │ │ ├── api/ │ │ │ ├── api.js │ │ │ ├── conjugate/ │ │ │ │ ├── toFuture.js │ │ │ │ ├── toGerund.js │ │ │ │ ├── toInfinitive.js │ │ │ │ ├── toNegative.js │ │ │ │ ├── toParticiple.js │ │ │ │ ├── toPast.js │ │ │ │ └── toPresent.js │ │ │ ├── debug.js │ │ │ ├── lib.js │ │ │ ├── parse/ │ │ │ │ ├── adverbs.js │ │ │ │ ├── getSubject.js │ │ │ │ ├── grammar/ │ │ │ │ │ ├── forms.js │ │ │ │ │ └── index.js │ │ │ │ ├── index.js │ │ │ │ └── root.js │ │ │ └── toJSON.js │ │ ├── find.js │ │ └── plugin.js │ ├── 4-four/ │ │ ├── facts/ │ │ │ ├── api.js │ │ │ ├── parse/ │ │ │ │ ├── adjective.js │ │ │ │ ├── index.js │ │ │ │ ├── noun.js │ │ │ │ ├── pivot.js │ │ │ │ ├── postProcess.js │ │ │ │ ├── statement/ │ │ │ │ │ └── index.js │ │ │ │ └── verb.js │ │ │ └── plugin.js │ │ └── sense/ │ │ ├── api/ │ │ │ └── api.js │ │ ├── compute/ │ │ │ └── index.js │ │ ├── model/ │ │ │ ├── _data.js │ │ │ ├── index.js │ │ │ ├── more.js │ │ │ └── senses/ │ │ │ ├── adjective.js │ │ │ ├── index.js │ │ │ ├── noun.js │ │ │ └── verb.js │ │ └── plugin.js │ ├── API/ │ │ ├── View.js │ │ ├── _lib.js │ │ ├── extend.js │ │ ├── inputs.js │ │ ├── methods/ │ │ │ ├── compute.js │ │ │ ├── index.js │ │ │ ├── loops.js │ │ │ └── utils.js │ │ └── world.js │ ├── _version.js │ ├── four.js │ ├── nlp.js │ ├── one.js │ ├── three.js │ └── two.js ├── tagger.scratch.js ├── tests/ │ ├── _ignore/ │ │ ├── abbreviation.ignore.js │ │ ├── before-after.ignore.js │ │ ├── participle.ignore.js │ │ ├── punctuation.ignore.js │ │ ├── quotations.ignore.js │ │ └── toQuestion.ignore.js │ ├── bugs.md │ ├── four/ │ │ ├── _lib.js │ │ ├── facts.ignore.js │ │ ├── match.ignore.js │ │ └── misc.ignore.js │ ├── hmm.js │ ├── one/ │ │ ├── _lib.js │ │ ├── cache/ │ │ │ ├── cache.test.js │ │ │ ├── keep-cache.test.js │ │ │ └── offset.test.js │ │ ├── change/ │ │ │ ├── append.test.js │ │ │ ├── case.test.js │ │ │ ├── concat.test.js │ │ │ ├── fork.ignore.js │ │ │ ├── hyphenate.test.js │ │ │ ├── insert.test.js │ │ │ ├── join.test.js │ │ │ ├── loop-mutate.test.js │ │ │ ├── prepend.test.js │ │ │ ├── reindex.test.js │ │ │ ├── remove.test.js │ │ │ ├── replace-sub.test.js │ │ │ ├── split.test.js │ │ │ └── splitOn.test.js │ │ ├── lexicon/ │ │ │ └── lexicon.test.js │ │ ├── lookup/ │ │ │ ├── lookup-long.test.js │ │ │ └── lookup.test.js │ │ ├── match/ │ │ │ ├── doc-match.test.js │ │ │ ├── encoding.test.js │ │ │ ├── fuzzy.test.js │ │ │ ├── if.test.js │ │ │ ├── lookaround.test.js │ │ │ ├── match-method.test.js │ │ │ ├── named-silent.test.js │ │ │ ├── negative.test.js │ │ │ ├── punctuation-match.test.js │ │ │ ├── regex.test.js │ │ │ ├── sweep-not.test.js │ │ │ ├── sweep.test.js │ │ │ └── syntax.test.js │ │ ├── match.test.js │ │ ├── misc/ │ │ │ ├── freeze.test.js │ │ │ ├── inputs.test.js │ │ │ ├── isFull.test.js │ │ │ ├── loops.test.js │ │ │ ├── misc.test.js │ │ │ ├── pointer.test.js │ │ │ ├── random.test.js │ │ │ ├── reservedwords.test.js │ │ │ ├── safe-contractions.test.js │ │ │ ├── slash.test.js │ │ │ ├── sort.test.js │ │ │ ├── typeahead.test.js │ │ │ ├── unicode.test.js │ │ │ └── whitespace.test.js │ │ ├── miss.test.js │ │ ├── output/ │ │ │ ├── hash.test.js │ │ │ └── html.test.js │ │ ├── pointers/ │ │ │ ├── complement.test.js │ │ │ ├── difference.test.js │ │ │ ├── intersection.test.js │ │ │ └── union.test.js │ │ └── tokenize/ │ │ ├── hyphen-matrix.test.js │ │ ├── hyphens.test.js │ │ ├── punctuation.test.js │ │ ├── sentence-split.test.js │ │ └── term-split.test.js │ ├── three/ │ │ ├── _lib.js │ │ ├── acronym.test.js │ │ ├── adjectives/ │ │ │ ├── adj-adv.test.js │ │ │ ├── adj-noun.test.js │ │ │ ├── adjectives.test.js │ │ │ ├── comparative.test.js │ │ │ └── superlative.test.js │ │ ├── api.test.js │ │ ├── chunker/ │ │ │ ├── chunks.ignore.js │ │ │ └── clauses.test.js │ │ ├── clause.test.js │ │ ├── coreference/ │ │ │ ├── base-coref.ignore.js │ │ │ ├── more.ignore.js │ │ │ └── tricky-coref.ignore.js │ │ ├── full-api.test.js │ │ ├── fuzz.test.js │ │ ├── hashTags.test.js │ │ ├── json-three.test.js │ │ ├── match.test.js │ │ ├── misc.test.js │ │ ├── miss.test.js │ │ ├── normalize/ │ │ │ ├── normalize-custom.test.js │ │ │ ├── normalize-methods.test.js │ │ │ └── normalize-preset.test.js │ │ ├── nouns/ │ │ │ ├── adjectives.test.js │ │ │ ├── isPlural.test.js │ │ │ ├── noun-find.test.js │ │ │ ├── parse.test.js │ │ │ ├── toPlural.test.js │ │ │ └── toSingular.test.js │ │ ├── numbers/ │ │ │ ├── backlog/ │ │ │ │ ├── agreement.ignore.js │ │ │ │ ├── conversion.ignore.js │ │ │ │ ├── money.ignore.js │ │ │ │ └── overlap.ignore.js │ │ │ ├── bigNumber.test.js │ │ │ ├── fractions.test.js │ │ │ ├── misc.test.js │ │ │ ├── number-parse.test.js │ │ │ ├── percent.test.js │ │ │ ├── prefix.test.js │ │ │ ├── toCardinal.test.js │ │ │ ├── toText.test.js │ │ │ ├── units.test.js │ │ │ └── value.test.js │ │ ├── parentheses.test.js │ │ ├── people/ │ │ │ ├── gender.test.js │ │ │ ├── people-parse.test.js │ │ │ └── people.test.js │ │ ├── places.test.js │ │ ├── plugin.test.js │ │ ├── possessives.test.js │ │ ├── quotations.test.js │ │ ├── redact.test.js │ │ ├── sentences/ │ │ │ ├── debullet.test.js │ │ │ ├── isQuestion.test.js │ │ │ ├── misc-conjugate.test.js │ │ │ ├── misc.test.js │ │ │ ├── negative.test.js │ │ │ ├── sentence-participle.ignore.js │ │ │ ├── sentence.test.js │ │ │ ├── svo.test.js │ │ │ ├── tense.test.js │ │ │ ├── toFuture.test.js │ │ │ ├── toGerund.ignore.js │ │ │ ├── toPast.test.js │ │ │ └── toPresent.test.js │ │ ├── setTag.test.js │ │ ├── slashes.test.js │ │ ├── subsets.test.js │ │ ├── sweep-tag.test.js │ │ ├── text-three.test.js │ │ ├── topics.test.js │ │ └── verbs/ │ │ ├── auxiliary.test.js │ │ ├── conjugate.test.js │ │ ├── imperative.test.js │ │ ├── isplural.test.js │ │ ├── misc.test.js │ │ ├── parse.test.js │ │ ├── parts.test.js │ │ ├── phrasal.test.js │ │ ├── phrasals.test.js │ │ ├── subject.test.js │ │ ├── toFuture.test.js │ │ ├── toGerund.test.js │ │ ├── toInfinitive.test.js │ │ ├── toNegative.test.js │ │ ├── toPast.test.js │ │ ├── toPastParticiple.test.js │ │ ├── toPresent.test.js │ │ ├── verb-find.test.js │ │ ├── verb-forms.test.js │ │ └── verb-root.test.js │ └── two/ │ ├── _backlog.js │ ├── _lib.js │ ├── contractions/ │ │ ├── contract.test.js │ │ ├── contraction-match.test.js │ │ ├── contraction.test.js │ │ ├── expand.test.js │ │ ├── had-would.test.js │ │ ├── is-has-possessive.test.js │ │ └── match-contraction.test.js │ ├── freeze/ │ │ ├── freeze.test.js │ │ ├── internal.test.js │ │ └── lex.test.js │ ├── groups/ │ │ ├── named-match.test.js │ │ └── named-multi.test.js │ ├── match/ │ │ ├── and.test.js │ │ ├── blocks.test.js │ │ ├── capture.test.js │ │ ├── fancy-match.test.js │ │ ├── greedy-capture.test.js │ │ ├── lookahead.test.js │ │ ├── match-tricky.test.js │ │ ├── match.test.js │ │ ├── min-max.test.js │ │ ├── multiword.test.js │ │ ├── not.test.js │ │ ├── or.test.js │ │ ├── root-match.test.js │ │ └── soft-match.test.js │ ├── match.test.js │ ├── misc/ │ │ ├── canBe.test.js │ │ ├── confidence.test.js │ │ ├── constructor.test.js │ │ ├── emoji.test.js │ │ ├── lazy.test.js │ │ ├── misc.test.js │ │ ├── multiTag.test.js │ │ ├── remove-more.test.js │ │ ├── root.test.js │ │ ├── smoke.test.js │ │ ├── tagRank.test.js │ │ ├── term-ids.test.js │ │ ├── unique.test.js │ │ └── wordcount.test.js │ ├── miss.test.js │ ├── output/ │ │ ├── json.test.js │ │ ├── out.test.js │ │ └── text.test.js │ ├── plugin/ │ │ ├── addTags.ignore.js │ │ └── addWords.test.js │ ├── tagger/ │ │ ├── _pennSample.js │ │ ├── actors.test.js │ │ ├── inline.test.js │ │ ├── lexicon.test.js │ │ ├── multi.test.js │ │ ├── number-match.test.js │ │ ├── penn.test.js │ │ ├── swears.test.js │ │ ├── topics.test.js │ │ └── untag.test.js │ ├── transform/ │ │ ├── clone.test.js │ │ ├── replace.test.js │ │ └── swap.test.js │ └── variables/ │ ├── gerund.test.js │ ├── org-match.test.js │ ├── past-adj.test.js │ ├── person-match.test.js │ ├── present-noun.test.js │ └── verb-phrase.test.js ├── three/ │ └── package.json ├── tokenize/ │ └── package.json ├── tsconfig.json ├── two/ │ └── package.json └── types/ ├── misc.d.ts ├── one.d.cts ├── one.d.ts ├── three.d.cts ├── three.d.ts ├── two.d.cts ├── two.d.ts └── view/ ├── one.d.cts ├── one.d.ts ├── three.d.cts ├── three.d.ts ├── two.d.cts └── two.d.ts ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================
npm install compromise
how easy text is to make,
↬ᔐᖜ↬
and how hard it is to actually parse and use?
compromise tries its best to turn text into data.
it makes limited and sensible decisions.
it's not as smart as you'd think.
```js
import nlp from 'compromise'
let doc = nlp('she sells seashells by the seashore.')
doc.verbs().toPastTense()
doc.text()
// 'she sold seashells by the seashore.'
```
and get data:
```js
import plg from 'compromise-speech'
nlp.extend(plg)
let doc = nlp('Milwaukee has certainly had its share of visitors..')
doc.compute('syllables')
doc.places().json()
/*
[{
"text": "Milwaukee",
"terms": [{
"normal": "milwaukee",
"syllables": ["mil", "wau", "kee"]
}]
}]
*/
```
avoid the problems of brittle parsers:
```js
let doc = nlp("we're not gonna take it..")
doc.has('gonna') // true
doc.has('going to') // true (implicit)
// transform
doc.contractions().expand()
doc.text()
// 'we are not going to take it..'
```
Use it on the client-side:
```html
```
or likewise:
```typescript
import nlp from 'compromise'
var doc = nlp('London is calling')
doc.verbs().toNegative()
// 'London is not calling'
```
okay -
compromise/one
A tokenizer of words, sentences, and punctuation.
```js import nlp from 'compromise/one' let doc = nlp("Wayne's World, party time") let data = doc.json() /* [{ normal:"wayne's world party time", terms:[{ text: "Wayne's", normal: "wayne" }, ... ] }] */ ```
compromise/one splits your text up, wraps it in a handy API,
/one is quick - most sentences take a 10th of a millisecond.
It can do ~1mb of text a second - or 10 wikipedia pages.
Infinite jest takes 3s.
compromise/two
A part-of-speech tagger, and grammar-interpreter.
```js import nlp from 'compromise/two' let doc = nlp("Wayne's World, party time") let str = doc.match('#Possessive #Noun').text() // "Wayne's World" ```
compromise has 83 tags, arranged in a handsome graph.
#FirstName → #Person → #ProperNoun → #Noun
you can see the grammar of each word by running `doc.debug()`
you can see the reasoning for each tag with `nlp.verbose('tagger')`.
if you prefer Penn tags, you can derive them with:
```js
let doc = nlp('welcome thrillho')
doc.compute('penn')
doc.json()
```
compromise/three
Phrase and sentence tooling.
```js import nlp from 'compromise/three' let doc = nlp("Wayne's World, party time") let str = doc.people().normalize().text() // "wayne" ```
compromise/three is a set of tooling to zoom into and operate on parts of a text. `.numbers()` grabs all the numbers in a document, for example - and extends it with new methods, like `.subtract()`. When you have a phrase, or group of words, you can see additional metadata about it with `.json()` ```js let doc = nlp('four out of five dentists') console.log(doc.fractions().json()) /*[{ text: 'four out of five', terms: [ [Object], [Object], [Object], [Object] ], fraction: { numerator: 4, denominator: 5, decimal: 0.8 } } ]*/ ``` ```js let doc = nlp('$4.09CAD') doc.money().json() /*[{ text: '$4.09CAD', terms: [ [Object] ], number: { prefix: '$', num: 4.09, suffix: 'cad'} } ]*/ ```
## API
### Compromise/one
##### Output
- **[.text()](https://observablehq.com/@spencermountain/compromise-text)** - return the document as text
- **[.json()](https://observablehq.com/@spencermountain/compromise-json)** - return the document as data
- **[.debug()](https://observablehq.com/@spencermountain/compromise-output)** - pretty-print the interpreted document
- **[.out()](https://observablehq.com/@spencermountain/compromise-output)** - a named or custom output
- **[.html({})](https://observablehq.com/@spencermountain/compromise-html)** - output custom html tags for matches
- **[.wrap({})](https://observablehq.com/@spencermountain/compromise-output)** - produce custom output for document matches
##### Utils
- **[.found](https://observablehq.com/@spencermountain/compromise-utils)** _[getter]_ - is this document empty?
- **[.docs](https://observablehq.com/@spencermountain/compromise-utils)** _[getter]_ get term objects as json
- **[.length](https://observablehq.com/@spencermountain/compromise-utils)** _[getter]_ - count the # of characters in the document (string length)
- **[.isView](https://observablehq.com/@spencermountain/compromise-utils)** _[getter]_ - identify a compromise object
- **[.compute()](https://observablehq.com/@spencermountain/compromise-compute)** - run a named analysis on the document
- **[.clone()](https://observablehq.com/@spencermountain/compromise-utils)** - deep-copy the document, so that no references remain
- **[.termList()](https://observablehq.com/@spencermountain/compromise-accessors)** - return a flat list of all Term objects in match
- **[.cache({})](https://observablehq.com/@spencermountain/compromise-cache)** - freeze the current state of the document, for speed-purposes
- **[.uncache()](https://observablehq.com/@spencermountain/compromise-cache)** - un-freezes the current state of the document, so it may be transformed
- **[.freeze({})](https://observablehq.com/@spencermountain/compromise-freeze)** - prevent any tags from being removed, in these terms
- **[.unfreeze({})](https://observablehq.com/@spencermountain/compromise-freeze)** - allow tags to change again, as default
##### Accessors
- **[.all()](https://observablehq.com/@spencermountain/compromise-utils)** - return the whole original document ('zoom out')
- **[.terms()](https://observablehq.com/@spencermountain/compromise-selections)** - split-up results by each individual term
- **[.first(n)](https://observablehq.com/@spencermountain/compromise-accessors)** - use only the first result(s)
- **[.last(n)](https://observablehq.com/@spencermountain/compromise-accessors)** - use only the last result(s)
- **[.slice(n,n)](https://observablehq.com/@spencermountain/compromise-accessors)** - grab a subset of the results
- **[.eq(n)](https://observablehq.com/@spencermountain/compromise-accessors)** - use only the nth result
- **[.firstTerms()](https://observablehq.com/@spencermountain/compromise-accessors)** - get the first word in each match
- **[.lastTerms()](https://observablehq.com/@spencermountain/compromise-accessors)** - get the end word in each match
- **[.fullSentences()](https://observablehq.com/@spencermountain/compromise-accessors)** - get the whole sentence for each match
- **[.groups()](https://observablehq.com/@spencermountain/compromise-accessors)** - grab any named capture-groups from a match
- **[.wordCount()](https://observablehq.com/@spencermountain/compromise-utils)** - count the # of terms in the document
- **[.confidence()](https://observablehq.com/@spencermountain/compromise-utils)** - an average score for pos tag interpretations
##### Match
_(match methods use the [match-syntax](https://docs.compromise.cool/compromise-match-syntax).)_
- **[.match('')](https://observablehq.com/@spencermountain/compromise-match)** - return a new Doc, with this one as a parent
- **[.not('')](https://observablehq.com/@spencermountain/compromise-match)** - return all results except for this
- **[.matchOne('')](https://observablehq.com/@spencermountain/compromise-match)** - return only the first match
- **[.if('')](https://observablehq.com/@spencermountain/compromise-match)** - return each current phrase, only if it contains this match ('only')
- **[.ifNo('')](https://observablehq.com/@spencermountain/compromise-match)** - Filter-out any current phrases that have this match ('notIf')
- **[.has('')](https://observablehq.com/@spencermountain/compromise-match)** - Return a boolean if this match exists
- **[.before('')](https://observablehq.com/@spencermountain/compromise-match)** - return all terms before a match, in each phrase
- **[.after('')](https://observablehq.com/@spencermountain/compromise-match)** - return all terms after a match, in each phrase
- **[.union()](https://observablehq.com/@spencermountain/compromise-set)** - return combined matches without duplicates
- **[.intersection()](https://observablehq.com/@spencermountain/compromise-set)** - return only duplicate matches
- **[.complement()](https://observablehq.com/@spencermountain/compromise-set)** - get everything not in another match
- **[.settle()](https://observablehq.com/@spencermountain/compromise-set)** - remove overlaps from matches
- **[.growRight('')](https://observablehq.com/@spencermountain/compromise-match)** - add any matching terms immediately after each match
- **[.growLeft('')](https://observablehq.com/@spencermountain/compromise-match)** - add any matching terms immediately before each match
- **[.grow('')](https://observablehq.com/@spencermountain/compromise-match)** - add any matching terms before or after each match
- **[.sweep(net)](https://observablehq.com/@spencermountain/compromise-sweep)** - apply a series of match objects to the document
- **[.splitOn('')](https://observablehq.com/@spencermountain/compromise-split)** - return a Document with three parts for every match ('splitOn')
- **[.splitBefore('')](https://observablehq.com/@spencermountain/compromise-split)** - partition a phrase before each matching segment
- **[.splitAfter('')](https://observablehq.com/@spencermountain/compromise-split)** - partition a phrase after each matching segment
- **[.join()](https://observablehq.com/@spencermountain/compromise-split)** - merge any neighbouring terms in each match
- **[.joinIf(leftMatch, rightMatch)](https://observablehq.com/@spencermountain/compromise-split)** - merge any neighbouring terms under given conditions
- **[.lookup([])](https://observablehq.com/@spencermountain/compromise-match)** - quick find for an array of string matches
- **[.autoFill()](https://observablehq.com/@spencermountain/compromise-typeahead)** - create type-ahead assumptions on the document
##### Tag
- **[.tag('')](https://observablehq.com/@spencermountain/compromise-tagger)** - Give all terms the given tag
- **[.tagSafe('')](https://observablehq.com/@spencermountain/compromise-tagger)** - Only apply tag to terms if it is consistent with current tags
- **[.unTag('')](https://observablehq.com/@spencermountain/compromise-tagger)** - Remove this term from the given terms
- **[.canBe('')](https://observablehq.com/@spencermountain/compromise-tagger)** - return only the terms that can be this tag
##### Case
- **[.toLowerCase()](https://observablehq.com/@spencermountain/compromise-case)** - turn every letter of every term to lower-cse
- **[.toUpperCase()](https://observablehq.com/@spencermountain/compromise-case)** - turn every letter of every term to upper case
- **[.toTitleCase()](https://observablehq.com/@spencermountain/compromise-case)** - upper-case the first letter of each term
- **[.toCamelCase()](https://observablehq.com/@spencermountain/compromise-case)** - remove whitespace and title-case each term
##### Whitespace
- **[.pre('')](https://observablehq.com/@spencermountain/compromise-whitespace)** - add this punctuation or whitespace before each match
- **[.post('')](https://observablehq.com/@spencermountain/compromise-whitespace)** - add this punctuation or whitespace after each match
- **[.trim()](https://observablehq.com/@spencermountain/compromise-whitespace)** - remove start and end whitespace
- **[.hyphenate()](https://observablehq.com/@spencermountain/compromise-whitespace)** - connect words with hyphen, and remove whitespace
- **[.dehyphenate()](https://observablehq.com/@spencermountain/compromise-whitespace)** - remove hyphens between words, and set whitespace
- **[.toQuotations()](https://observablehq.com/@spencermountain/compromise-whitespace)** - add quotation marks around these matches
- **[.toParentheses()](https://observablehq.com/@spencermountain/compromise-whitespace)** - add brackets around these matches
##### Loops
- **[.map(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - run each phrase through a function, and create a new document
- **[.forEach(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - run a function on each phrase, as an individual document
- **[.filter(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - return only the phrases that return true
- **[.find(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - return a document with only the first phrase that matches
- **[.some(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - return true or false if there is one matching phrase
- **[.random(fn)](https://observablehq.com/@spencermountain/compromise-loops)** - sample a subset of the results
##### Insert
- **[.replace(match, replace)](https://observablehq.com/@spencermountain/compromise-insert)** - search and replace match with new content
- **[.replaceWith(replace)](https://observablehq.com/@spencermountain/compromise-insert)** - substitute-in new text
- **[.remove()](https://observablehq.com/@spencermountain/compromise-insert)** - fully remove these terms from the document
- **[.insertBefore(str)](https://observablehq.com/@spencermountain/compromise-insert)** - add these new terms to the front of each match (prepend)
- **[.insertAfter(str)](https://observablehq.com/@spencermountain/compromise-insert)** - add these new terms to the end of each match (append)
- **[.concat()](https://observablehq.com/@spencermountain/compromise-insert)** - add these new things to the end
- **[.swap(fromLemma, toLemma)](https://observablehq.com/@spencermountain/compromise-root)** - smart replace of root-words,using proper conjugation
##### Transform
- **[.sort('method')](https://observablehq.com/@spencermountain/compromise-sorting)** - re-arrange the order of the matches (in place)
- **[.reverse()](https://observablehq.com/@spencermountain/compromise-sorting)** - reverse the order of the matches, but not the words
- **[.unique()](https://observablehq.com/@spencermountain/compromise-sorting)** - remove any duplicate matches
##### Lib
_(these methods are on the main `nlp` object)_
- **[nlp.tokenize(str)](https://observablehq.com/@spencermountain/compromise-tokenization)** - parse text without running POS-tagging
- **[nlp.lazy(str, match)](https://observablehq.com/@spencermountain/compromise-performance)** - scan through a text with minimal analysis
- **[nlp.plugin({})](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - mix in a compromise-plugin
- **[nlp.parseMatch(str)](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - pre-parse any match statements into json
- **[nlp.world()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - grab or change library internals
- **[nlp.model()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - grab all current linguistic data
- **[nlp.methods()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - grab or change internal methods
- **[nlp.hooks()](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - see which compute methods run automatically
- **[nlp.verbose(mode)](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - log our decision-making for debugging
- **[nlp.version](https://observablehq.com/@spencermountain/compromise-constructor-methods)** - current semver version of the library
- **[nlp.addWords(obj, isFrozen?)](https://observablehq.com/@spencermountain/compromise-plugin)** - add new words to the lexicon
- **[nlp.addTags(obj)](https://observablehq.com/@spencermountain/compromise-plugin)** - add new tags to the tagSet
- **[nlp.typeahead(arr)](https://observablehq.com/@spencermountain/compromise-typeahead)** - add words to the auto-fill dictionary
- **[nlp.buildTrie(arr)](https://observablehq.com/@spencermountain/compromise-lookup)** - compile a list of words into a fast lookup form
- **[nlp.buildNet(arr)](https://observablehq.com/@spencermountain/compromise-sweep)** - compile a list of matches into a fast match form
### compromise/two:
##### Contractions
- **[.contractions()](https://observablehq.com/@spencermountain/compromise-contractions)** - things like "didn't"
- **[.contractions().expand()](https://observablehq.com/@spencermountain/compromise-contractions)** - things like "didn't"
- **[.contract()](https://observablehq.com/@spencermountain/compromise-contractions)** - things like "didn't"
### compromise/three:
- **[.normalize({})](https://observablehq.com/@spencermountain/compromise-normalization)** - clean-up the text in various ways
##### Nouns
- **[.nouns()](https://observablehq.com/@spencermountain/nouns)** - return any subsequent terms tagged as a Noun
- **[.nouns().json()](https://observablehq.com/@spencermountain/nouns)** - overloaded output with noun metadata
- **[.nouns().parse()](https://observablehq.com/@spencermountain/nouns)** - get tokenized noun-phrase
- **[.nouns().isPlural()](https://observablehq.com/@spencermountain/nouns)** - return only plural nouns
- **[.nouns().isSingular()](https://observablehq.com/@spencermountain/nouns)** - return only singular nouns
- **[.nouns().toPlural()](https://observablehq.com/@spencermountain/nouns)** - `'football captain' → 'football captains'`
- **[.nouns().toSingular()](https://observablehq.com/@spencermountain/nouns)** - `'turnovers' → 'turnover'`
- **[.nouns().adjectives()](https://observablehq.com/@spencermountain/nouns)** - get any adjectives describing this noun
##### Verbs
- **[.verbs()](https://observablehq.com/@spencermountain/verbs)** - return any subsequent terms tagged as a Verb
- **[.verbs().json()](https://observablehq.com/@spencermountain/verbs)** - overloaded output with verb metadata
- **[.verbs().parse()](https://observablehq.com/@spencermountain/verbs)** - get tokenized verb-phrase
- **[.verbs().subjects()](https://observablehq.com/@spencermountain/verbs)** - what is doing the verb action
- **[.verbs().adverbs()](https://observablehq.com/@spencermountain/verbs)** - return the adverbs describing this verb.
- **[.verbs().isSingular()](https://observablehq.com/@spencermountain/verbs)** - return singular verbs like 'spencer walks'
- **[.verbs().isPlural()](https://observablehq.com/@spencermountain/verbs)** - return plural verbs like 'we walk'
- **[.verbs().isImperative()](https://observablehq.com/@spencermountain/verbs)** - only instruction verbs like 'eat it!'
- **[.verbs().toPastTense()](https://observablehq.com/@spencermountain/verbs)** - `'will go' → 'went'`
- **[.verbs().toPresentTense()](https://observablehq.com/@spencermountain/verbs)** - `'walked' → 'walks'`
- **[.verbs().toFutureTense()](https://observablehq.com/@spencermountain/verbs)** - `'walked' → 'will walk'`
- **[.verbs().toInfinitive()](https://observablehq.com/@spencermountain/verbs)** - `'walks' → 'walk'`
- **[.verbs().toGerund()](https://observablehq.com/@spencermountain/verbs)** - `'walks' → 'walking'`
- **[.verbs().toPastParticiple()](https://observablehq.com/@spencermountain/verbs)** - `'drive' → 'had driven'`
- **[.verbs().conjugate()](https://observablehq.com/@spencermountain/verbs)** - return all conjugations of these verbs
- **[.verbs().isNegative()](https://observablehq.com/@spencermountain/verbs)** - return verbs with 'not', 'never' or 'no'
- **[.verbs().isPositive()](https://observablehq.com/@spencermountain/verbs)** - only verbs without 'not', 'never' or 'no'
- **[.verbs().toNegative()](https://observablehq.com/@spencermountain/verbs)** - `'went' → 'did not go'`
- **[.verbs().toPositive()](https://observablehq.com/@spencermountain/verbs)** - `"didn't study" → 'studied'`
##### Numbers
- **[.numbers()](https://observablehq.com/@spencermountain/compromise-values)** - grab all written and numeric values
- **[.numbers().parse()](https://observablehq.com/@spencermountain/compromise-values)** - get tokenized number phrase
- **[.numbers().get()](https://observablehq.com/@spencermountain/compromise-values)** - get a simple javascript number
- **[.numbers().json()](https://observablehq.com/@spencermountain/compromise-values)** - overloaded output with number metadata
- **[.numbers().toNumber()](https://observablehq.com/@spencermountain/compromise-values)** - convert 'five' to `5`
- **[.numbers().toLocaleString()](https://observablehq.com/@spencermountain/compromise-values)** - add commas, or nicer formatting for numbers
- **[.numbers().toText()](https://observablehq.com/@spencermountain/compromise-values)** - convert '5' to `five`
- **[.numbers().toOrdinal()](https://observablehq.com/@spencermountain/compromise-values)** - convert 'five' to `fifth` or `5th`
- **[.numbers().toCardinal()](https://observablehq.com/@spencermountain/compromise-values)** - convert 'fifth' to `five` or `5`
- **[.numbers().isOrdinal()](https://observablehq.com/@spencermountain/compromise-values)** - return only ordinal numbers
- **[.numbers().isCardinal()](https://observablehq.com/@spencermountain/compromise-values)** - return only cardinal numbers
- **[.numbers().isEqual(n)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers with this value
- **[.numbers().greaterThan(min)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers bigger than n
- **[.numbers().lessThan(max)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers smaller than n
- **[.numbers().between(min, max)](https://observablehq.com/@spencermountain/compromise-values)** - return numbers between min and max
- **[.numbers().isUnit(unit)](https://observablehq.com/@spencermountain/compromise-values)** - return only numbers in the given unit, like 'km'
- **[.numbers().set(n)](https://observablehq.com/@spencermountain/compromise-values)** - set number to n
- **[.numbers().add(n)](https://observablehq.com/@spencermountain/compromise-values)** - increase number by n
- **[.numbers().subtract(n)](https://observablehq.com/@spencermountain/compromise-values)** - decrease number by n
- **[.numbers().increment()](https://observablehq.com/@spencermountain/compromise-values)** - increase number by 1
- **[.numbers().decrement()](https://observablehq.com/@spencermountain/compromise-values)** - decrease number by 1
- **[.money()](https://observablehq.com/@spencermountain/compromise-values)** - things like `'$2.50'`
- **[.money().get()](https://observablehq.com/@spencermountain/compromise-values)** - retrieve the parsed amount(s) of money
- **[.money().json()](https://observablehq.com/@spencermountain/compromise-values)** - currency + number info
- **[.money().currency()](https://observablehq.com/@spencermountain/compromise-values)** - which currency the money is in
- **[.fractions()](https://observablehq.com/@spencermountain/compromise-values)** - like '2/3rds' or 'one out of five'
- **[.fractions().parse()](https://observablehq.com/@spencermountain/compromise-values)** - get tokenized fraction
- **[.fractions().get()](https://observablehq.com/@spencermountain/compromise-values)** - simple numerator, denominator data
- **[.fractions().json()](https://observablehq.com/@spencermountain/compromise-values)** - json method overloaded with fractions data
- **[.fractions().toDecimal()](https://observablehq.com/@spencermountain/compromise-values)** - '2/3' -> '0.66'
- **[.fractions().normalize()](https://observablehq.com/@spencermountain/compromise-values)** - 'four out of 10' -> '4/10'
- **[.fractions().toText()](https://observablehq.com/@spencermountain/compromise-values)** - '4/10' -> 'four tenths'
- **[.fractions().toPercentage()](https://observablehq.com/@spencermountain/compromise-values)** - '4/10' -> '40%'
- **[.percentages()](https://observablehq.com/@spencermountain/compromise-values)** - like '2.5%'
- **[.percentages().get()](https://observablehq.com/@spencermountain/compromise-values)** - return the percentage number / 100
- **[.percentages().json()](https://observablehq.com/@spencermountain/compromise-values)** - json overloaded with percentage information
- **[.percentages().toFraction()](https://observablehq.com/@spencermountain/compromise-values)** - '80%' -> '8/10'
##### Sentences
- **[.sentences()](https://observablehq.com/@spencermountain/compromise-sentences)** - return a sentence class with additional methods
- **[.sentences().json()](https://observablehq.com/@spencermountain/compromise-sentences)** - overloaded output with sentence metadata
- **[.sentences().toPastTense()](https://observablehq.com/@spencermountain/compromise-sentences)** - `he walks` -> `he walked`
- **[.sentences().toPresentTense()](https://observablehq.com/@spencermountain/compromise-sentences)** - `he walked` -> `he walks`
- **[.sentences().toFutureTense()](https://observablehq.com/@spencermountain/compromise-sentences)** -- `he walks` -> `he will walk`
- **[.sentences().toInfinitive()](https://observablehq.com/@spencermountain/compromise-sentences)** -- verb root-form `he walks` -> `he walk`
- **[.sentences().toNegative()](https://observablehq.com/@spencermountain/compromise-sentences)** - - `he walks` -> `he didn't walk`
- **[.sentences().isQuestion()](https://observablehq.com/@spencermountain/compromise-sentences)** - return questions with a `?`
- **[.sentences().isExclamation()](https://observablehq.com/@spencermountain/compromise-sentences)** - return sentences with a `!`
- **[.sentences().isStatement()](https://observablehq.com/@spencermountain/compromise-sentences)** - return sentences without `?` or `!`
##### Adjectives
- **[.adjectives()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'quick'`
- **[.adjectives().json()](https://observablehq.com/@spencermountain/compromise-selections)** - get adjective metadata
- **[.adjectives().conjugate()](https://observablehq.com/@spencermountain/compromise-selections)** - return all inflections of these adjectives
- **[.adjectives().adverbs()](https://observablehq.com/@spencermountain/compromise-selections)** - get adverbs describing this adjective
- **[.adjectives().toComparative()](https://observablehq.com/@spencermountain/compromise-selections)** - 'quick' -> 'quicker'
- **[.adjectives().toSuperlative()](https://observablehq.com/@spencermountain/compromise-selections)** - 'quick' -> 'quickest'
- **[.adjectives().toAdverb()](https://observablehq.com/@spencermountain/compromise-selections)** - 'quick' -> 'quickly'
- **[.adjectives().toNoun()](https://observablehq.com/@spencermountain/compromise-selections)** - 'quick' -> 'quickness'
##### Misc selections
- **[.clauses()](https://observablehq.com/@spencermountain/compromise-selections)** - split-up sentences into multi-term phrases
- **[.chunks()](https://observablehq.com/@spencermountain/compromise-selections)** - split-up sentences noun-phrases and verb-phrases
- **[.hyphenated()](https://observablehq.com/@spencermountain/compromise-selections)** - all terms connected with a hyphen or dash like `'wash-out'`
- **[.phoneNumbers()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'(939) 555-0113'`
- **[.hashTags()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'#nlp'`
- **[.emails()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'hi@compromise.cool'`
- **[.emoticons()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `:)`
- **[.emojis()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `💋`
- **[.atMentions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'@nlp_compromise'`
- **[.urls()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'compromise.cool'`
- **[.pronouns()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'he'`
- **[.conjunctions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'but'`
- **[.prepositions()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'of'`
- **[.abbreviations()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'Mrs.'`
- **[.people()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - names like 'John F. Kennedy'
- **[.people().json()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - get person-name metadata
- **[.people().parse()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - get person-name interpretation
- **[.places()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - like 'Paris, France'
- **[.organizations()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - like 'Google, Inc'
- **[.topics()](https://observablehq.com/@spencermountain/topics-named-entity-recognition)** - `people()` + `places()` + `organizations()`
- **[.adverbs()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'quickly'`
- **[.adverbs().json()](https://observablehq.com/@spencermountain/compromise-selections)** - get adverb metadata
- **[.acronyms()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `'FBI'`
- **[.acronyms().strip()](https://observablehq.com/@spencermountain/compromise-selections)** - remove periods from acronyms
- **[.acronyms().addPeriods()](https://observablehq.com/@spencermountain/compromise-selections)** - add periods to acronyms
- **[.parentheses()](https://observablehq.com/@spencermountain/compromise-selections)** - return anything inside (parentheses)
- **[.parentheses().strip()](https://observablehq.com/@spencermountain/compromise-selections)** - remove brackets
- **[.possessives()](https://observablehq.com/@spencermountain/compromise-selections)** - things like `"Spencer's"`
- **[.possessives().strip()](https://observablehq.com/@spencermountain/compromise-selections)** - "Spencer's" -> "Spencer"
- **[.quotations()](https://observablehq.com/@spencermountain/compromise-selections)** - return any terms inside paired quotation marks
- **[.quotations().strip()](https://observablehq.com/@spencermountain/compromise-selections)** - remove quotation marks
- **[.slashes()](https://observablehq.com/@spencermountain/compromise-selections)** - return any terms grouped by slashes
- **[.slashes().split()](https://observablehq.com/@spencermountain/compromise-selections)** - turn 'love/hate' into 'love hate'
nlp('the koala eats/shoots/leaves').has('koala leaves') //false
- **inter-sentence match:**
By default, sentences are the top-level abstraction.
Inter-sentence, or multi-sentence matches aren't supported without a plugin:
nlp("that's it. Back to Winnipeg!").has('it back')//false
- **nested match syntax:**
the doc.match('(modern (major|minor))? general')
complex matches must be achieved with successive **.match()** statements.
- **dependency parsing:**
Proper sentence transformation requires understanding the [syntax tree](https://en.wikipedia.org/wiki/Parse_tree) of a sentence, which we don't currently do.
We should! Help wanted with this.
##### FAQ
💃 Can it run on my arduino-watch?
Only if it's water-proof!
Read quick start for running compromise in workers, mobile apps, and all sorts of funny environments.
✨ Partial builds?
we do offer a tokenize-only build, which has the POS-tagger pulled-out.
but otherwise, compromise isn't easily tree-shaken.
the tagging methods are competitive, and greedy, so it's not recommended to pull things out.
Note that without a full POS-tagging, the contraction-parser won't work perfectly. ((spencer's cool) vs. (spencer's house))
It's recommended to run the library fully.
MIT
================================================
FILE: builds/compromise.js
================================================
!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e="undefined"!=typeof globalThis?globalThis:e||self).nlp=t()}(this,(function(){"use strict";var e={methods:{one:{},two:{},three:{},four:{}},model:{one:{},two:{},three:{}},compute:{},hooks:[]};const t={compute:function(e){const{world:t}=this,n=t.compute;return"string"==typeof e&&n.hasOwnProperty(e)?n[e](this):(e=>"[object Array]"===Object.prototype.toString.call(e))(e)?e.forEach((r=>{t.compute.hasOwnProperty(r)?n[r](this):console.warn("no compute:",e)})):"function"==typeof e?e(this):console.warn("no compute:",e),this}};var n={forEach:function(e){return this.fullPointer.forEach(((t,n)=>{const r=this.update([t]);e(r,n)})),this},map:function(e,t){const n=this.fullPointer.map(((t,n)=>{const r=this.update([t]),o=e(r,n);return void 0===o?this.none():o}));if(0===n.length)return t||this.update([]);if(void 0!==n[0]){if("string"==typeof n[0])return n;if("object"==typeof n[0]&&(null===n[0]||!n[0].isView))return n}let r=[];return n.forEach((e=>{r=r.concat(e.fullPointer)})),this.toView(r)},filter:function(e){let t=this.fullPointer;t=t.filter(((t,n)=>{const r=this.update([t]);return e(r,n)}));return this.update(t)},find:function(e){const t=this.fullPointer.find(((t,n)=>{const r=this.update([t]);return e(r,n)}));return this.update([t])},some:function(e){return this.fullPointer.some(((t,n)=>{const r=this.update([t]);return e(r,n)}))},random:function(e=1){let t=this.fullPointer,n=Math.floor(Math.random()*t.length);return n+e>this.length&&(n=this.length-e,n=n<0?0:n),t=t.slice(n,n+e),this.update(t)}};const r={termList:function(){return this.methods.one.termList(this.docs)},terms:function(e){const t=this.match(".");return"number"==typeof e?t.eq(e):t},groups:function(e){if(e||0===e)return this.update(this._groups[e]||[]);const t={};return Object.keys(this._groups).forEach((e=>{t[e]=this.update(this._groups[e])})),t},eq:function(e){let t=this.pointer;return t||(t=this.docs.map(((e,t)=>[t]))),t[e]?this.update([t[e]]):this.none()},first:function(){return this.eq(0)},last:function(){const e=this.fullPointer.length-1;return this.eq(e)},firstTerms:function(){return this.match("^.")},lastTerms:function(){return this.match(".$")},slice:function(e,t){let n=this.pointer||this.docs.map(((e,t)=>[t]));return n=n.slice(e,t),this.update(n)},all:function(){return this.update().toView()},fullSentences:function(){const e=this.fullPointer.map((e=>[e[0]]));return this.update(e).toView()},none:function(){return this.update([])},isDoc:function(e){if(!e||!e.isView)return!1;const t=this.fullPointer,n=e.fullPointer;return!t.length!==n.length&&t.every(((e,t)=>!!n[t]&&(e[0]===n[t][0]&&e[1]===n[t][1]&&e[2]===n[t][2])))},wordCount:function(){return this.docs.reduce(((e,t)=>(e+=t.filter((e=>""!==e.text)).length,e)),0)},isFull:function(){const e=this.pointer;if(!e)return!0;if(0===e.length||0!==e[0][0])return!1;let t=0,n=0;return this.document.forEach((e=>t+=e.length)),this.docs.forEach((e=>n+=e.length)),t===n},getNth:function(e){return"number"==typeof e?this.eq(e):"string"==typeof e?this.if(e):this}};r.group=r.groups,r.fullSentence=r.fullSentences,r.sentence=r.fullSentences,r.lastTerm=r.lastTerms,r.firstTerm=r.firstTerms;const o=Object.assign({},r,t,n);o.get=o.eq;class View{constructor(t,n,r={}){[["document",t],["world",e],["_groups",r],["_cache",null],["viewType","View"]].forEach((e=>{Object.defineProperty(this,e[0],{value:e[1],writable:!0})})),this.ptrs=n}get docs(){let t=this.document;return this.ptrs&&(t=e.methods.one.getDoc(this.ptrs,this.document)),t}get pointer(){return this.ptrs}get methods(){return this.world.methods}get model(){return this.world.model}get hooks(){return this.world.hooks}get isView(){return!0}get found(){return this.docs.length>0}get length(){return this.docs.length}get fullPointer(){const{docs:e,ptrs:t,document:n}=this,r=t||e.map(((e,t)=>[t]));return r.map((e=>{let[t,r,o,a,i]=e;return r=r||0,o=o||(n[t]||[]).length,n[t]&&n[t][r]&&(a=a||n[t][r].id,n[t][o-1]&&(i=i||n[t][o-1].id)),[t,r,o,a,i]}))}update(e){const t=new View(this.document,e);if(this._cache&&e&&e.length>0){const n=[];e.forEach(((e,t)=>{const[r,o,a]=e;(1===e.length||0===o&&this.document[r].length===a)&&(n[t]=this._cache[r])})),n.length>0&&(t._cache=n)}return t.world=this.world,t}toView(e){return new View(this.document,e||this.pointer)}fromText(e){const{methods:t}=this,n=t.one.tokenize.fromString(e,this.world),r=new View(n);return r.world=this.world,r.compute(["normal","freeze","lexicon"]),this.world.compute.preTagger&&r.compute("preTagger"),r.compute("unfreeze"),r}clone(){let e=this.document.slice(0);e=e.map((e=>e.map((e=>((e=Object.assign({},e)).tags=new Set(e.tags),e)))));const t=this.update(this.pointer);return t.document=e,t._cache=this._cache,t}}Object.assign(View.prototype,o);const a=function(e){return e&&"object"==typeof e&&!Array.isArray(e)};function i(e,t){if(a(t))for(const n in t)a(t[n])?(e[n]||Object.assign(e,{[n]:{}}),i(e[n],t[n])):Object.assign(e,{[n]:t[n]});return e}const s=function(e,t,n,r){if(o=e,"[object Array]"===Object.prototype.toString.call(o))return void e.forEach((e=>s(e,t,n,r)));var o;const{methods:a,model:l,compute:u,hooks:c}=t;e.methods&&function(e,t){for(const n in t)e[n]=e[n]||{},Object.assign(e[n],t[n])}(a,e.methods),e.model&&i(l,e.model),e.irregulars&&function(e,t){const n=e.two.models||{};Object.keys(t).forEach((e=>{t[e].pastTense&&(n.toPast&&(n.toPast.ex[e]=t[e].pastTense),n.fromPast&&(n.fromPast.ex[t[e].pastTense]=e)),t[e].presentTense&&(n.toPresent&&(n.toPresent.ex[e]=t[e].presentTense),n.fromPresent&&(n.fromPresent.ex[t[e].presentTense]=e)),t[e].gerund&&(n.toGerund&&(n.toGerund.ex[e]=t[e].gerund),n.fromGerund&&(n.fromGerund.ex[t[e].gerund]=e)),t[e].comparative&&(n.toComparative&&(n.toComparative.ex[e]=t[e].comparative),n.fromComparative&&(n.fromComparative.ex[t[e].comparative]=e)),t[e].superlative&&(n.toSuperlative&&(n.toSuperlative.ex[e]=t[e].superlative),n.fromSuperlative&&(n.fromSuperlative.ex[t[e].superlative]=e))}))}(l,e.irregulars),e.compute&&Object.assign(u,e.compute),c&&(t.hooks=c.concat(e.hooks||[])),e.api&&e.api(n),e.lib&&Object.keys(e.lib).forEach((t=>r[t]=e.lib[t])),e.tags&&r.addTags(e.tags),e.words&&r.addWords(e.words),e.frozen&&r.addWords(e.frozen,!0),e.mutate&&e.mutate(t,r)},l=function(e){return"[object Array]"===Object.prototype.toString.call(e)},u=function(e,t,n){const{methods:r}=n,o=new t([]);if(o.world=n,"number"==typeof e&&(e=String(e)),!e)return o;if("string"==typeof e){return new t(r.one.tokenize.fromString(e,n))}if(a=e,"[object Object]"===Object.prototype.toString.call(a)&&e.isView)return new t(e.document,e.ptrs);var a;if(l(e)){if(l(e[0])){const n=e.map((e=>e.map((e=>({text:e,normal:e,pre:"",post:" ",tags:new Set})))));return new t(n)}const n=e.map((e=>e.terms.map((e=>(l(e.tags)&&(e.tags=new Set(e.tags)),e)))));return new t(n)}return o},c=Object.assign({},e),h=function(e,t){t&&h.addWords(t);const n=u(e,View,c);return e&&n.compute(c.hooks),n};Object.defineProperty(h,"_world",{value:c,writable:!0}),h.tokenize=function(e,t){const{compute:n}=this._world;t&&h.addWords(t);const r=u(e,View,c);return n.contractions&&r.compute(["alias","normal","machine","contractions"]),r},h.plugin=function(e){return s(e,this._world,View,this),this},h.extend=h.plugin,h.world=function(){return this._world},h.model=function(){return this._world.model},h.methods=function(){return this._world.methods},h.hooks=function(){return this._world.hooks},h.verbose=function(e){const t="undefined"!=typeof process&&process.env?process.env:self.env||{};return t.DEBUG_TAGS="tagger"===e||!0===e||"",t.DEBUG_MATCH="match"===e||!0===e||"",t.DEBUG_CHUNKS="chunker"===e||!0===e||"",this},h.version="14.15.0";var d={one:{cacheDoc:function(e){const t=e.map((e=>{const t=new Set;return e.forEach((e=>{""!==e.normal&&t.add(e.normal),e.switch&&t.add(`%${e.switch}%`),e.implicit&&t.add(e.implicit),e.machine&&t.add(e.machine),e.root&&t.add(e.root),e.alias&&e.alias.forEach((e=>t.add(e)));const n=Array.from(e.tags);for(let e=0;eworker output:
================================================ FILE: eslint.config.js ================================================ import * as regexpPlugin from "eslint-plugin-regexp" export default [ regexpPlugin.configs["flat/recommended"], { // "ignorePatterns": ["**/builds/*"], "ignores": ["**/builds/*"], "rules": { "comma-dangle": [1, "only-multiline"], "quotes": [0, "single", "avoid-escape"], "max-nested-callbacks": [1, 4], "max-params": [1, 5], "consistent-return": 1, "no-bitwise": 1, "no-empty": 1, "no-console": 1, "no-duplicate-imports": 1, "no-eval": 2, "no-implied-eval": 2, "no-mixed-operators": 2, "no-multi-assign": 2, "no-nested-ternary": 1, "no-prototype-builtins": 0, "no-self-compare": 1, "no-sequences": 1, "no-shadow": 2, "no-unmodified-loop-condition": 1, "no-use-before-define": 1, "prefer-const": 1, "radix": 1, "no-unused-vars": 1, "regexp/prefer-d": 0, "regexp/prefer-w": 0, "regexp/prefer-range": 0, "regexp/no-unused-capturing-group": 0, "eslint-comments/no-unused-disable": 0 } }, ] ================================================ FILE: one/package.json ================================================ { "name": "compromise-one", "version": "14.14.4", "description": "", "type": "module", "module": "./../src/one.js", "main": "./../src/one.js", "types": "./../types/one.d.ts", "exports": { "./package.json": "./package.json", ".": { "import": { "types": "./../types/one/one.d.ts", "default": "./../src/one.js" }, "require": { "types": "./../types/one.d.cts", "default": "./../builds/one/compromise-one.cjs" } } }, "author": "Spencer Kellynpm install compromise-cmd-k
This library is an earnest attempt to get date information out of text, in a clear way -
- including all informal text formats, and folksy shorthands.
```js
import nlp from 'compromise'
import datePlugin from 'compromise-dates'
nlp.plugin(datePlugin)
let doc = nlp('the second monday of february')
doc.dates().get()[0]
/*
{ start: '2021-02-08T00:00:00.000Z', end: '2021-02-08T23:59:59.999Z'}
*/
```
• Tokenization and disambiguation with compromise.
• Timezone and DST reckoning with spacetime [1]
• Number-parsing with compromise-numbers [1]
• Timezone reconciliation with spacetime-informal [1]
### _Things it does well:_
| `explicit-dates` | _description_ | `Start` | `End` |
| ----------------------------------- | :-----------------------------------: | ---------------: | ---------------: |
| _march 2nd_ | | March 2, 12:00am | March 2, 11:59pm |
| _2 march_ | | '' | '' |
| _tues march 2_ | | '' | '' |
| _march the second_ | _natural-language number_ | '' | '' |
| _on the 2nd_ | _implicit months_ | '' | '' |
| _tuesday the 2nd_ | _date-reckoning_ | '' | '' |
|
### _Things it does awkwardly:_
| _`hmmm,`_ | _description_ | `Start` | `End` |
| ------------------------ | :--------------------------------------------: | :-----: | :---: |
| _middle of 2019/June_ | tries to find the sorta-center | June 15 | '' |
| _good friday 2025_ | tries to reckon astronomically-set holidays | '' | '' |
| _Oct 22 1975 2am in PST_ | historical DST changes (assumes current dates) | '' | '' |
### _Things it doesn't do:_
| _😓,_ | _description_ | `Start` | `End` |
| ------------------------------------------- | :----------------------: | :-----: | :---: |
| _not this Saturday, but the Saturday after_ | self-reference logic | '' | '' |
| _3 years ago tomorrow_ | folksy short-hand | '' | '' |
| _2100_ | military time formats | '' | '' |
| _may 97_ | 'bare' 2-digit years | '' | '' |
### Configuration:
`.dates()` accepts an optional object, that lets you set the context for the date parsing.
```js
const context = {
timezone: 'Canada/Eastern', //the default timezone is 'ETC/UTC'
today: '2020-02-20', //the implicit, or reference day/year
punt: { weeks: 2 }, // the implied duration to use for 'after june 2nd'
dayStart: '8:00am',
dayEnd: '5:30pm',
dmy : false //assume british-format dates, when unclear
}
nlp('in two days').dates(context).get()
/*
[{ start: '2020-02-22T08:00:00.000+5:00', end: '2020-02-22T17:30:00.000+5:00' }]
*/
```
## _Opinions_:
### _Start of week:_
By default, weeks start on a Monday, and _'next week'_ will run from Monday morning to Sunday night.
This can be configued in spacetime, but right now we are not passing-through this config.
### _Implied durations:_
_'after October'_ returns a range starting **Nov 1st**, and ending **2-weeks** after, by default.
This can be configured by setting `punt` param in the context object:
```js
doc.dates({ punt: { month: 1 } })
```
### _Future bias:_
_'May 7th'_ will prefer a May 7th in the future.
The parser will return a past-date though, in the current-month:
```js
// from march 2nd
nlp('feb 30th').dates({ today: '2021-02-01' }).get()
```
### _This/Next/Last:_
named-weeks or months eg _'this/next/last week'_ are mostly straight-forward.
#### _This monday_
A bare 'monday' will always refer to itself, or the upcoming monday.
- Saying _'this monday'_ on monday, is itself.
- Saying _'this monday'_ on tuesday , is next week.
Likewise, _'this june'_ in June, is itself. _'this june'_ in any other month, is the nearest June in the future.
Future versions of this library could look at sentence-tense to help disambiguate these dates - _'i paid on monday'_ vs _'i will pay on monday'_.
#### _Last monday_
If it's Tuesday, _'last monday'_ will not mean yesterday.
- Saying _'last monday'_ on a tuesday will be -1 week.
- Saying _'a week ago monday'_ will also work.
- Saying _'this past monday'_ will return yesterday.
For reference, **Wit.ai** & **chronic** libraries both return yesterday. **Natty** and **SugarJs** returns -1 week, like we do.
_'last X'_ can be less than 7 days backward, if it crosses a week starting-point:
- Saying _'last friday'_ on a monday will be only a few days back.
#### _Next Friday_
If it's Tuesday, _'next wednesday'_ will not be tomorrow. It will be a week after tomorrow.
- Saying _'next wednesday'_ on a tuesday, will be +1 week.
- Saying _'a week wednesday'_ will also be +1 week.
- Saying _'this coming wednesday'_ will be tomorrow.
For reference, **Wit.ai**, **chronic**, and **Natty** libraries all return tomorrow. **SugarJs** returns +1 week, like we do.
### _Nth Week:_
The first week of a month, or a year is the first week _with a thursday in it_. This is a weird, but widely-held standard. I believe it's a military formalism. It cannot be (easily) configued. This means that the start-date for _first week of January_ may be a Monday in December, etc.
As expected, _first monday of January_ will always be in January.
### _British/American ambiguity:_
by default, we use the same interpretation of dates as javascript does - we assume `01/02/2020` is Jan 2nd, (US-version) but allow `13/01/2020` to be Jan 13th (UK-version).
if you want to co-erce an interpretation of `02/03/1999`, you can set it with the `dmy:true` option:
```js
nlp('02/03/1999').dates().get() //February 3
nlp('02/03/1999').dates({dmy:true}).get() // March 2
```
ISO dates, (like `1999-03-02`) are unaffected by the change.
### _Seasons:_
By default, _'this summer'_ will return **June 1 - Sept 1**, which is northern hemisphere ISO.
Configuring the default hemisphere should be possible in the future.
### _Day times:_
There are some hardcoded times for _'lunch time'_ and others, but mainly, a day begins at `12:00am` and ends at `11:59pm` - the last millisecond of the day.
### _Invalid dates:_
compromise will tag anything that looks like a date, but not validate the dates until they are parsed.
- _'january 34th 2020'_ will return **Jan 31 2020**.
- _'tomorrow at 2:62pm'_ will return just return 'tomorrow'.
- _'6th week of february_ will return the 2nd week of march.
- Setting an hour that's skipped, or repeated by a DST change will return the closest valid time to the DST change.
### _Inclusive/exclusive ranges:_
_'between january and march'_ will include all of march. This is usually pretty-ambiguous normally.
### _Date greediness:_
This library makes no assumptions about the input text, and is careful to avoid false-positive dates.
If you know your text is a date, you can crank-up the date-tagger with a [compromise-plugin](https://observablehq.com/@spencermountain/compromise-plugins), like so:
```js
nlp.extend(function (Doc, world) {
// ambiguous words
world.addWords({
weds: 'WeekDay',
wed: 'WeekDay',
sat: 'WeekDay',
sun: 'WeekDay',
})
world.postProcess(doc => {
// tag '2nd quarter' as a date
doc.match('#Ordinal quarter').tag('#Date')
// tag '2/2' as a date (not a fraction)
doc.match('/[0-9]{1,2}/[0-9]{1,2}/').tag('#Date')
})
})
```
### _Misc:_
- _'thursday the 16th'_ - will set to the 16th, even if it's not thursday
- _'in a few hours/years'_ - in 2 hours/years
- _'jan 5th 2008 to Jan 6th the following year'_ - date-range explicit references
- assume _'half past 5'_ is 5pm
1 - Regular-expressions are too-brittle to parse dates.
2 - Neural-nets are too-wonky to parse dates.
3 - A corporation, or startup is the wrong place to build a universal date-parser.
Parsing _dates_, _times_, _durations_, and _intervals_ from natural language can be a solved-problem.
A rule-based, community open-source library - _one based on simple NLP_ - is the best way to build a natural language date parser - commercial, or otherwise - for the frontend, or the backend.
The _[match-syntax](https://observablehq.com/@spencermountain/compromise-match-syntax)_ is effective and easy, _javascript_ is prevailing, and the more people who contribute, the better.
### See also
- [Duckling](https://duckling.wit.ai/) - by wit.ai (facebook)
- [Sugarjs/dates](https://sugarjs.com/dates/) - by Andrew Plummer (js)
- [Chronic](https://github.com/mojombo/chronic) - by Tom Preston-Werner (Ruby)
- [SUTime](https://nlp.stanford.edu/software/sutime.shtml) - by Angel Chang, Christopher Manning (Java)
- [Natty](http://natty.joestelmach.com/) - by Joe Stelmach (Java)
- [rrule](https://github.com/jakubroztocil/rrule) - repeating date-interval handler (js)
- [ParseDateTime](https://pypi.org/project/parsedatetime/) by Mike Taylor (Python)
**MIT** licenced
================================================
FILE: plugins/dates/builds/compromise-dates.cjs
================================================
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
typeof define === 'function' && define.amd ? define(factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.compromiseDates = factory());
})(this, (function () { 'use strict';
// chop things up into bite-size pieces
const split = function (dates) {
let m = null;
// don't split anything if it looks like a range
if (dates.has('^(between|within) #Date')) {
return dates
}
if (dates.has('#Month')) {
// 'june 5, june 10'
m = dates.match('[#Month #Value] and? #Month', 0).ifNo('@hasDash$');
if (m.found) {
dates = dates.splitAfter(m);
}
// '5 june, 10 june'
m = dates.match('[#Value #Month] and? #Value #Month', 0);
if (m.found) {
dates = dates.splitAfter(m);
}
// 'june, august'
m = dates.match('^[#Month] and? #Month #Ordinal?$', 0);
if (m.found) {
dates = dates.splitAfter(m);
}
// 'june 5th, june 10th'
m = dates.match('[#Month #Value] #Month', 0).ifNo('@hasDash$');
if (m.found) {
dates = dates.splitAfter(m);
}
}
if (dates.has('#WeekDay')) {
// 'tuesday, wednesday'
m = dates.match('^[#WeekDay] and? #WeekDay$', 0).ifNo('@hasDash$');
if (m.found) {
dates = dates.splitAfter(m);
}
// 'tuesday, wednesday, and friday'
m = dates.match('#WeekDay #WeekDay and? #WeekDay');
if (m.found) {
dates = dates.splitOn('#WeekDay');
}
// monday, wednesday
m = dates.match('[#WeekDay] (and|or|this|next)? #WeekDay', 0).ifNo('@hasDash$');
if (m.found) {
dates = dates.splitAfter('#WeekDay');
}
}
// next week tomorrow
m = dates.match('(this|next) #Duration [(today|tomorrow|yesterday)]', 0);
if (m.found) {
dates = dates.splitBefore(m);
}
// tomorrow 15 march
m = dates.match('[(today|tomorrow|yesterday)] #Value #Month', 0);
if (m.found) {
dates = dates.splitAfter(m);
}
// tomorrow yesterday
m = dates.match('[(today|tomorrow|yesterday)] (today|tomorrow|yesterday|#WeekDay)', 0).ifNo('@hasDash$');
if (m.found) {
dates = dates.splitAfter(m);
}
//1998 and 1999
m = dates.match('#Year [and] #Year', 0);
if (m.found) {
dates = dates.splitAfter(m);
}
// cleanup any splits
dates = dates.not('^and');
dates = dates.not('and$');
return dates
};
const findDate = function (doc) {
let dates = doc.match('#Date+');
// ignore only-durations like '20 minutes'
dates = dates.filter(m => {
let isDuration = m.has('^#Duration+$') || m.has('^#Value #Duration+$');
// allow 'q4', etc
if (isDuration === true && m.has('(#FinancialQuarter|quarter)')) {
return true
}
return isDuration === false
});
// 30 minutes on tuesday
let m = dates.match('[#Cardinal #Duration (in|on|this|next|during|for)] #Date', 0);
if (m.found) {
dates = dates.not(m);
}
// 30 minutes tuesday
m = dates.match('[#Cardinal #Duration] #WeekDay', 0);
if (m.found) {
dates = dates.not(m);
}
// tuesday for 30 mins
m = dates.match('#Date [for #Value #Duration]$', 0);
if (m.found) {
dates = dates.not(m);
}
// '20 minutes june 5th'
m = dates.match('[#Cardinal #Duration] #Date', 0); //but allow '20 minutes ago'
if (m.found && !dates.has('#Cardinal #Duration] (ago|from|before|after|back)')) {
dates = dates.not(m);
}
// for 20 minutes
m = dates.match('for #Cardinal #Duration');
if (m.found) {
dates = dates.not(m);
}
// 'one saturday'
dates = dates.notIf('^one (#WeekDay|#Month)$');
// tokenize the dates
dates = split(dates);
// $5 an hour
dates = dates.notIf('(#Money|#Percentage)');
dates = dates.notIf('^per #Duration');
return dates
};
const knownUnits = {
second: true,
minute: true,
hour: true,
day: true,
week: true,
weekend: true,
month: true,
season: true,
quarter: true,
year: true,
};
const aliases$2 = {
wk: 'week',
min: 'minute',
sec: 'second',
weekend: 'week', //for now...
};
const parseUnit = function (m) {
let unit = m.match('#Duration').text('normal');
unit = unit.replace(/s$/, '');
// support shorthands like 'min'
if (aliases$2.hasOwnProperty(unit)) {
unit = aliases$2[unit];
}
return unit
};
//turn '5 weeks before' to {weeks:5}
const parseShift = function (doc) {
let result = {};
let m = doc.none();
let shift = doc.match('#DateShift+');
if (shift.found === false) {
return { res: result, m }
}
// '5 weeks'
shift.match('#Cardinal #Duration').forEach((ts) => {
let num = ts.match('#Cardinal').numbers().get()[0];
if (num && typeof num === 'number') {
let unit = parseUnit(ts);
if (knownUnits[unit] === true) {
result[unit] = num;
}
}
});
//is it 2 weeks ago? → -2
if (shift.has('(before|ago|hence|back)$') === true) {
Object.keys(result).forEach((k) => (result[k] *= -1));
}
m = shift.match('#Cardinal #Duration');
shift = shift.not(m);
// supoprt '1 day after tomorrow'
m = shift.match('[