master 3400c20d43f4 cached
157 files
455.1 KB
120.6k tokens
659 symbols
1 requests
Download .txt
Showing preview only (501K chars total). Download the full file or copy to clipboard to get everything.
Repository: morfologik/morfologik-stemming
Branch: master
Commit: 3400c20d43f4
Files: 157
Total size: 455.1 KB

Directory structure:
gitextract_1f6qqk15/

├── .github/
│   └── workflows/
│       └── ci.yml
├── .gitignore
├── CHANGES.txt
├── CONTRIBUTING.txt
├── LICENSE.txt
├── README.txt
├── etc/
│   ├── eclipse/
│   │   └── settings/
│   │       ├── org.eclipse.jdt.core.prefs
│   │       └── org.eclipse.m2e.core.prefs
│   └── forbidden-apis/
│       └── signatures.txt
├── morfologik-fsa/
│   ├── pom.xml
│   └── src/
│       └── main/
│           └── java/
│               └── morfologik/
│                   └── fsa/
│                       ├── ByteSequenceIterator.java
│                       ├── CFSA.java
│                       ├── CFSA2.java
│                       ├── FSA.java
│                       ├── FSA5.java
│                       ├── FSAFlags.java
│                       ├── FSAHeader.java
│                       ├── FSATraversal.java
│                       ├── MatchResult.java
│                       └── StateVisitor.java
├── morfologik-fsa-builders/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   └── java/
│       │       └── morfologik/
│       │           └── fsa/
│       │               └── builders/
│       │                   ├── CFSA2Serializer.java
│       │                   ├── ConstantArcSizeFSA.java
│       │                   ├── FSA5Serializer.java
│       │                   ├── FSABuilder.java
│       │                   ├── FSAInfo.java
│       │                   ├── FSASerializer.java
│       │                   └── FSAUtils.java
│       └── test/
│           ├── java/
│           │   └── morfologik/
│           │       └── fsa/
│           │           └── builders/
│           │               ├── CFSA2SerializerTest.java
│           │               ├── FSA5SerializerTest.java
│           │               ├── FSA5Test.java
│           │               ├── FSABuilderTest.java
│           │               ├── FSATestUtils.java
│           │               ├── FSATraversalTest.java
│           │               ├── MinMax.java
│           │               ├── SerializerTestBase.java
│           │               └── TestBase.java
│           └── resources/
│               └── morfologik/
│                   └── fsa/
│                       └── builders/
│                           ├── abc-numbers.fsa
│                           ├── abc.fsa
│                           ├── abc.in
│                           ├── en_tst.dict
│                           ├── minimal.fsa
│                           ├── minimal.in
│                           ├── minimal2.fsa
│                           └── minimal2.in
├── morfologik-polish/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   ├── java/
│       │   │   └── morfologik/
│       │   │       └── stemming/
│       │   │           └── polish/
│       │   │               └── PolishStemmer.java
│       │   └── resources/
│       │       └── morfologik/
│       │           └── stemming/
│       │               └── polish/
│       │                   ├── polish.LICENSE.Polish.txt
│       │                   ├── polish.LICENSE.txt
│       │                   ├── polish.README.Polish.txt
│       │                   ├── polish.README.txt
│       │                   ├── polish.dict
│       │                   └── polish.info
│       └── test/
│           └── java/
│               └── morfologik/
│                   └── stemming/
│                       └── polish/
│                           ├── Gh27Test.java
│                           └── PolishMorfologikStemmerTest.java
├── morfologik-speller/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   └── java/
│       │       └── morfologik/
│       │           └── speller/
│       │               ├── HMatrix.java
│       │               └── Speller.java
│       └── test/
│           ├── java/
│           │   └── morfologik/
│           │       └── speller/
│           │           ├── HMatrixTest.java
│           │           └── SpellerTest.java
│           └── resources/
│               └── morfologik/
│                   └── speller/
│                       ├── dict-with-freq.dict
│                       ├── dict-with-freq.info
│                       ├── dict-with-freq.txt
│                       ├── issue38.dict
│                       ├── issue38.info
│                       ├── issue38.input
│                       ├── issue94.dict
│                       ├── issue94.info
│                       ├── pissara-test.dict
│                       ├── pissara-test.info
│                       ├── pissara-test.txt
│                       ├── reps_dist2.dict
│                       ├── reps_dist2.info
│                       ├── reps_dist2.txt
│                       ├── single-char-word.dict
│                       ├── single-char-word.info
│                       ├── slownik.dict
│                       ├── slownik.info
│                       ├── test-infix.dict
│                       ├── test-infix.info
│                       ├── test-utf-spell.dict
│                       ├── test-utf-spell.info
│                       ├── test_freq_iso.dict
│                       └── test_freq_iso.info
├── morfologik-stemming/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   └── java/
│       │       └── morfologik/
│       │           └── stemming/
│       │               ├── ArrayViewList.java
│       │               ├── BufferUtils.java
│       │               ├── Dictionary.java
│       │               ├── DictionaryAttribute.java
│       │               ├── DictionaryIterator.java
│       │               ├── DictionaryLookup.java
│       │               ├── DictionaryMetadata.java
│       │               ├── DictionaryMetadataBuilder.java
│       │               ├── EncoderType.java
│       │               ├── ISequenceEncoder.java
│       │               ├── IStemmer.java
│       │               ├── NoEncoder.java
│       │               ├── TrimInfixAndSuffixEncoder.java
│       │               ├── TrimPrefixAndSuffixEncoder.java
│       │               ├── TrimSuffixEncoder.java
│       │               ├── UnmappableInputException.java
│       │               └── WordData.java
│       └── test/
│           ├── java/
│           │   └── morfologik/
│           │       └── stemming/
│           │           ├── DictionaryLookupTest.java
│           │           ├── DictionaryMetadataBuilderTest.java
│           │           ├── DictionaryMetadataTest.java
│           │           ├── DictionaryTest.java
│           │           ├── EncodersTest.java
│           │           └── SequenceEncodersTest.java
│           └── resources/
│               └── morfologik/
│                   └── stemming/
│                       ├── escape-separator.info
│                       ├── test-diacritics-utf8.dict
│                       ├── test-diacritics-utf8.info
│                       ├── test-infix.dict
│                       ├── test-infix.info
│                       ├── test-prefix.dict
│                       ├── test-prefix.info
│                       ├── test-removed-props.dict
│                       ├── test-removed-props.info
│                       ├── test-separator-in-lookup.fsa
│                       ├── test-separator-in-lookup.in
│                       ├── test-separators.dict
│                       ├── test-separators.info
│                       ├── test-separators.txt
│                       ├── test-synth.dict
│                       ├── test-synth.info
│                       └── unicode-separator.info
├── morfologik-tools/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   ├── assembly/
│       │   │   └── package.xml
│       │   ├── java/
│       │   │   └── morfologik/
│       │   │       └── tools/
│       │   │           ├── BinaryInput.java
│       │   │           ├── CliTool.java
│       │   │           ├── CustomParameterConverters.java
│       │   │           ├── DictApply.java
│       │   │           ├── DictCompile.java
│       │   │           ├── DictDecompile.java
│       │   │           ├── ExitStatus.java
│       │   │           ├── ExitStatusException.java
│       │   │           ├── FSABuild.java
│       │   │           ├── FSACompile.java
│       │   │           ├── FSADecompile.java
│       │   │           ├── FSADump.java
│       │   │           ├── FSAInfo.java
│       │   │           ├── Launcher.java
│       │   │           ├── SerializationFormat.java
│       │   │           ├── ValidateFileExists.java
│       │   │           └── ValidateParentDirExists.java
│       │   └── package/
│       │       ├── README.txt
│       │       └── examples/
│       │           ├── 01-fsa-build.input
│       │           ├── 01-fsa-build.txt
│       │           ├── 02-fsa-dump.txt
│       │           ├── 03-fsa-info.txt
│       │           ├── 04-dict-compile.info
│       │           ├── 04-dict-compile.input
│       │           ├── 04-dict-compile.txt
│       │           └── 05-dict-decompile.txt
│       └── test/
│           └── java/
│               └── morfologik/
│                   └── tools/
│                       ├── DictCompileBug.java
│                       ├── DictCompileTest.java
│                       └── FSACompileTest.java
└── pom.xml

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/ci.yml
================================================
name: CI

on:
  push:
    branches: [master]
  pull_request:

jobs:
  build:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - name: Set up JDK 21
        uses: actions/setup-java@v5
        with:
          distribution: temurin
          java-version: 21
          cache: maven

      - run: mvn --batch-mode verify


================================================
FILE: .gitignore
================================================
*.versionsBackup
tmp/
dist/
target/
*.patch
.eclipse/
.project
.classpath
.settings
*.name
*.iml
.idea/

================================================
FILE: CHANGES.txt
================================================

Morfologik, Change Log
======================

For an up-to-date CHANGES file see 
https://github.com/morfologik/morfologik-stemming/blob/master/CHANGES

======================= morfologik-stemming 2.2.0 =======================

Bug Fixes

 * PR #121: fix bug in replacements: s>ss, ss>s (Jaume Ortolà).

 * PR #118: fix HMatrix not being reset between calls to
   Speller.findReplacementCandidates(), causing incorrect candidates to be
   returned on repeated calls (Jaume Ortolà).

 * GH-38: support ^ (start) and $ (end) anchors and _ (space) in
   replacement-pairs, following hunspell REP conventions.

 * GH-75: Fix incorrect and incomplete CharsetDecoder usage in Speller.findRepl():
   missing charBuffer.clear() before decode and missing decoder.flush() after
   decode, which could produce wrong candidates for stateful encodings.

Other Changes

 * apply spotless (google java format) formatting to sources.

 * switch to junit5/ jupiter and randomizedtesting-jupiter

 * Update Maven build plugins to current versions.

 * Require Java 21 for compiling the project. The output jar remains Java 11 
   compatible.

======================= morfologik-stemming 2.1.9 =======================

Other Changes

 * PR #114: improve run-on suggestions for camel case words (Jaume Ortolà)

======================= morfologik-stemming 2.1.8 =======================

Other Changes

 * GH-112: Add automatic module name to all JARs.
 * Upgrade selected build dependencies.

======================= morfologik-stemming 2.1.7 =======================

Bug Fixes

 * PR #103: fix distance value in the result of `Speller.findReplacementCandidates`
   (Daniel Naber).

 * GH-102: upgrade jcommander to newest version. (Dawid Weiss)

Other Changes

 * PR #103: introduce `Speller.replaceRunOnWordCandidates()` which returns
   `CandidateData` (Daniel Naber).

======================= morfologik-stemming 2.1.6 =======================

Other Changes

 * PR #101: fix replaceRunOnWords() not working for words that are uppercase at
   sentence start (Daniel Naber).

======================= morfologik-stemming 2.1.5 =======================

Bug Fixes

 * PR #96: incorrect logic in runOnWords (Jaume Ortolà).

 * PR #97: micro performance optimization (Daniel Naber).

Other Changes

 * GH-95: Speller: findReplacementCandidates returns full CandidateData. This 
          commit also refactors the Speller to use a stateless returned array
          list rather than reuse an internal field. Should not make a 
          practical difference. (Dawid Weiss)

======================= morfologik-stemming 2.1.4 =======================

Bug Fixes

 * PR #93: Case-changed words are always good suggestions (Jaume Ortolà).

 * GH-92: FSATraversal may return NOT_FOUND instead of AUTOMATON_HAS_PREFIX
          (stevendolg via Dawid Weiss)

Other Changes

 * Updated build and test plugins to newer versions.

======================= morfologik-stemming 2.1.3 =======================

Bug Fixes

 * GH-86: Speller: words containing the dictionary separator are not handled
          properly (Jaume Ortolà via Dawid Weiss).

======================= morfologik-stemming 2.1.2 =======================

Bug Fixes

 * GH-85: Encoded sequences can clash with separator byte and cause assertion 
   errors. (Daniel Naber, Dawid Weiss).

======================= morfologik-stemming 2.1.1 =======================

Bug Fixes

 * PR #78: Fix dependency issue in morfologik-speller (Alden Quimby).

 * GH-84: Dictionary resources not found with security manager.
   (Uwe Schindler)

Other Changes

 * GH-79: Corrected a corner case in DictCompileTest. (Dawid Weiss)

 * GH-77: Trailing spaces in encoder name can lead to illegal argument exception.
   (Jaume Ortolà, Dawid Weiss)

======================= morfologik-stemming 2.1.0 =======================

New Features

 * GH-74: Add dict_apply tool to apply a dictionary to a file or stdin. 
   (Dawid Weiss)

 * GH-73: Update Polish stemming dictionaries to polimorfologik 2.1. (Dawid Weiss)

Bug Fixes

 * GH-76: Consolidate and fix character encoding and decoding. (Dawid Weiss)

Other Changes

 * GH-63: BufferUtils.ensureCapacity now clears the input buffer. This also
   affects WordData methods that accept a reusable byte buffer -- it is now
   always cleared prior to being flipped and returned. (Dawid Weiss)

======================= morfologik-stemming 2.0.2 =======================

Bug Fixes

 * GH-68: WordData.clone() should be public. (Dawid Weiss)

Other Changes

 * GH-64: reverted back OSGi annotations (bundle packaging). (Dawid Weiss)

 * GH-72: Rename tools: fsa_dump to fsa_decompile and fsa_build to fsa_compile.
   Existing names remain as aliases but will be removed in 2.1.0. (Dawid Weiss)

======================= morfologik-stemming 2.0.1 =======================

Bug Fixes

 * GH-65: Dictionary.read(URL) ends in NPE when reading from a JAR resource
   (Dawid Weiss)

======================= morfologik-stemming 2.0.0 =======================

This release comes with a cleanup of the API for Java 1.7. There are
several aspects of the code that have been dropped (or added):

  - NIO is used extensively, mostly for better error reporting.

  - There is a simplified lookup of resources, no class-relative loading
    of dictionaries for example. The caller is in charge of looking
    up either an URL to the dictionary or providing an InputStream to it.

  - Removed internal caching of dictionaries from Dictionary. The 
    Polish stemmer is initialized lazily and reuses its dictionary 
    internally.

  - Numerous minor tweaks of parameters. JavaDocs.

  - A complete rewrite of the tools to compile (and decompile) FSA automata
    and complete stemming dictionaries. The tools now assert the validity
    of input data files and ensure no corrupt dictionaries can be produced.

Changes in backwards compatibility policy

 * GH-64: Removed OSGi support because of Maven issues (forks build
   phases, tests, etc.).

 * GH-62: Recompress Polish dictionary to use ';' as the separator.
   (Dawid Weiss)

 * GH-59: Moved Dictionary.convertText utility to 
   DictionaryLookup.applyReplacements and fixed current reliance on map 
   ordering. (Dawid Weiss)

 * GH-55: Removed the "distribution" module entirely. The tools module
   should be self-organizing. Complete overhaul of all the tools. 
   Examples. Simplified syntax, options and assumptions. 
   Input sanity checks and validation. (Dawid Weiss)

 * GH-57: Restructured the project into FSA traversal/ reading (only)
   and FSA Builders (construction). This cleans up dependency
   structure as well (HPPC is not required for FSA traversals).
   (Dawid Weiss)

 * GH-54: Make Java 1.7 the minimum required version. Certain methods
   that relied on File as arguments have been removed or changed to
   accept Path. (Dawid Weiss)

New Features

 * GH-53: Review library dependencies and bring them up to date. 
   (Dawid Weiss)

 * Added OSGi support (Michal Hlavac)

 * GH-51: Remove and fail on deprecated metadata (fsa.dict.uses-*).
   (Dawid Weiss)

Optimizations

 * GH-61: Refactored the code to use one encoding/ decoding routine
   and ByteBuffers. Removed dependency on Guava.

Bug Fixes

 * GH-32: make replaceRunOnWords return "a lot" for "alot", etc. 
   (Daniel Naber)

 * GH-34: ArrayIndexOutOfBoundsException with replacement-pairs. 
   (Jaume Ortolà, Daniel Naber)

======================= morfologik-stemming 1.10.0 =======================

Changes in backwards compatibility policy

New Features
 
 * Added OSGi support (Michal Hlavac)

Bug Fixes

 * GH-32: make replaceRunOnWords return "a lot" for "alot", etc. 
   (Daniel Naber)

 * GH-34: ArrayIndexOutOfBoundsException with replacement-pairs. 
   (Jaume Ortolà, Daniel Naber)

======================= morfologik-stemming 1.9.1 =======================

Changes in backwards compatibility policy

New Features

Bug Fixes

 * Now only the longest replacement key is selected when using replacement
   pairs (thanks to Jaume Ortolà). This fixes a subtle regression
   introduced in 1.9.0.

Optimizations

======================= morfologik-stemming 1.9.0 =======================

Changes in backwards compatibility policy

New Features

* Added capability to normalize input and output strings for dictionaries.
  This is useful for dictionaries that do not support ligatures, for example.
  To specify input conversion, use the property 'fsa.dict.input-conversion'
  in the .info file. The output conversion (for example, to use ligatures)
  is specified by 'fsa.dict.output-conversion'. Note that lengthy 
  conversion tables may negatively affect performance.

Bug Fixes

Optimizations

 * The suggestion search for the speller is now performed directly by traversing
   the dictionary automaton, which makes it much more time-efficient (thanks
   to Jaume Ortolà).

 * Suggestions are generated faster by avoiding unnecessary case conversions.

======================= morfologik-stemming 1.8.3 =======================

Bug Fixes

* Fixed a bug for spelling dictionaries in non-UTF encodings with 
  separators: strings with non-encodable characters might have been 
  accepted as spelled correctly even if they were missing in the 
  dictionary.

======================= morfologik-stemming 1.8.2 =======================

New Features

* Added the option of using frequencies of words for sorting spelling 
  replacements. It can be used in both spelling and tagging dictionaries.
  'fsa.dict.frequency-included=true' must be added to the .info file.
  For building the dictionary, add at the end of each entry a separator and 
  a character between A and Z (A: less frequently used words; 
  Z: more frequently used words). (Jaume Ortolà)

======================= morfologik-stemming 1.8.1 =======================

Changes in backwards compatibility policy

* MorphEncodingTool will *fail* if it detects data/lines that contain the 
  separator annotation byte. This is because such lines get encoded into
  something that the decoder cannot process. You can use \u0000 as the 
  annotation byte to avoid clashes with any existing data.

======================= morfologik-stemming 1.8.0 =======================

Changes in backwards compatibility policy

* Command-line option changes to MorphEncodingTool - it now accepts an explicit
  name of the sequence encoder, not infix/suffix/prefix booleans.  

* Updating dependencies to their newest versions.

New Features

* Dictionary .info files can specify the sequence decoder explicitly:
  suffix, prefix, infix, none are supported. For backwards compatibility,
  fsa.dict.uses-prefixes, fsa.dict.uses-infixes and fsa.dict.uses-suffixes
  are still supported, but will be removed in the next major version.

* Command-line option changes to MorphEncodingTool - it now accepts an explicit
  name of the sequence encoder, not infix/suffix/prefix booleans.  

* Rewritten implementation of tab-separated data files (tab2morph tool).
  The output should yield smaller files, especially for prefix encoding
  and infix encoding. This does *not* necessarily mean smaller automata
  but we're working on getting these as well.

  Example output before and after refactoring:
  
  Prefix coder:
  postmodernizm|modernizm|xyz => [before] postmodernizm+ANmodernizm+xyz
                              => [after ] postmodernizm+EA+xyz
  
  Infix coder:
  laquelle|lequel|D f s       => [before] laquelle+AAHequel+D f s
                              => [after ] laquelle+AGAquel+D f s

* Changed the default format of the Polish dictionary from infix
  encoded to prefix encoded (smaller output size).

Optimizations

* A number of internal implementation cleanups and refactorings.

======================= morfologik-stemming 1.7.2 =======================

* A quick fix for incorrect decoding of certain suffixes (long suffixes).

* Increased max. recursion level in Speller to 6 from 4. (Jaume Ortolà)

======================= morfologik-stemming 1.7.1 =======================

* Fixed a couple of bugs in morfologik-speller (Jaume Ortolà).

======================= morfologik-stemming 1.7.0 =======================

* Changed DictionaryMetadata API (access methods for encoder/decoder).

* Initial version of morfologik-speller component.

* Minor changes to the FSADumpTool: the header block is always UTF-8 
  encoded, the default platform encoding does not matter. This is done to 
  always support certain attributes that may be unicode (and would be 
  incorrectly dumped otherwise).

* Metadata *.info files can now be encoded in UTF-8 to support text 
  attributes that otherwise would require text2ascii conversion.

======================= morfologik-stemming 1.6.0 =======================

* Update morfologik-polish data to Morfologik 2.0 PoliMorf (08.03.2013). 
  Deprecated DICTIONARY constants (unified dictionary only).
          
* Important! The format of encoding tags has changed and is now 
  multiple-tags-per-lemma. The value returned from WordData#getTag 
  may be a number of tags concatenated with a "+" character. Previously
  the same lamma/stem would be returned multiple times, each time with 
  a different tag.

* Moving code from SourceForge to github.

======================= morfologik-stemming 1.5.5 =======================

* Made hppc an optional component of morfologik-fsa. It is required
  for constructing FSA automata only and causes problems with javac.
  http://stackoverflow.com/questions/3800462/can-i-prevent-javac-accessing-the-class-path-from-the-manifests-of-our-third-par

======================= morfologik-stemming 1.5.4 =======================

* Replaced byte-based speller with CharBasedSpeller.

* Warn about UTF-8 files with BOM.
 
* Fixed a typo in package name (speller).

======================= morfologik-stemming 1.5.3 =======================

* Initial release of spelling correction submodule.

* Updated morfologik-polish data to morfologik 1.9 [12.06.2012]

* Updated morfologik-polish licensing info to BSD (yay).

======================= morfologik-stemming 1.5.2 =======================

* An alternative Polish dictionary added (BSD licensed): SGJP (Morfeusz). 
  PolishStemmer can now take an enum switching between the dictionary to 
  be used or combine both.

* Project split into modules. A single jar version (no external 
  dependencies) added by transforming via proguard.

* Enabled use of escaped special characters in the tab2morph tool.

* Added guards against the input term having separator character 
  somewhere (this will now return an empty list of matches). Added 
  getSeparatorChar to DictionaryLookup so that one can check for this 
  condition manually, if needed.

======================= morfologik-stemming 1.5.1 =======================

* Build system switch to Maven (tested with Maven2).

======================= morfologik-stemming 1.5.0 =======================

* Major size saving improvements in CFSA2. Built in Polish dictionary 
  size decreased from 2,811,345 to 1,806,661 (CFSA2 format).

* FSABuilder returns a ready-to-be-used FSA (ConstantArcSizeFSA). 
  Construction overhead for this automaton is a round zero (it is 
  immediately serialized in-memory).

* Polish dictionary updated to Morfologik 1.7. [19.11.2010]

* Added an option to serialize automaton to CFSA2 or FSA5 directly from 
  fsa_build.

* CFSA is now deprecated for serialization (the code still reads CFSA 
  automata, but will no be able to serialize them). Use CFSA2.

* Added immediate state interning. Speedup in automaton construction by 
  about 30%, memory use decreased significantly (did not perform exact 
  measurements, but incremental construction from presorted data should 
  consume way less memory).

* Added an option to build FSA from already sorted data (--sorted). 
  Avoids in-memory sorting. Pipe the input through shell sort if 
  building FSA from large data.

* Changed the default ordering from Java signed-byte to C-like unsigned 
  byte value. This lets one use GNU sort to sort the input using 
  'export LC_ALL=C; sort input'.  

* Added traversal routines to calculate perfect hashing based on 
  FSA with NUMBERS.

* Changed the order of serialized arcs in the binary serializer for FSA5 
  to lexicographic  (consistent with the input). Depth-first traversal 
  recreates the input, in other words.

* Removed character-based automata.

* Incompatible API changes to FSA builders (moved to morfologik.fsa).

* Incompatible API changes to FSATraversalHelper. Cleaned up match 
  types, added unit tests. 

* An external dependency HPPC (high performance primitive collections) 
  is now required

======================= morfologik-stemming 1.4.1 =======================

* Upgrade of the built-in Morfologik dictionary for Polish (in CFSA 
  format).

* Added options to define custom FILLER and ANNOT_SEPARATOR bytes in the 
  fsa_build tool.

* Corrected an inconsistency with the C fsa package -- FILLER and 
  ANNOT_SEPARATOR characters are now identical with the C version.
          
* Cleanups to the tools' launcher -- will complain about missing JARs, 
  if any.

======================= morfologik-stemming 1.4.0 =======================

* Added FSA5 construction in Java (on byte sequences). Added preliminary 
  support for character sequences. Added a command line tool for FSA5
  construction from unsorted data (sorting is done in-memory).

* Added a tool to encode tab-delimited dictionaries to the format 
  accepted by fsa_build and FSA5 construction tool.

* Added a new version of Morfologik dictionary for Polish (in CFSA format).

======================= morfologik-stemming 1.3.0 =======================

* Added runtime checking for tools availability so that unavailable tools 
  don't show up in the list.

* Recompressed the built-in Polish dictionary to CFSA. 

* Cleaned up FSA/Dictionary separation. FSAs don't store encoding any more 
  (because it does not make sense for them to do so). The FSA is a purely 
  abstract class pushing functionality to sub-classes. Input stream 
  reading cleaned up.

* Added initial code for CFSA (compressed FSA). Reduces automata size 
  about 10%. 

* Changes in the public API. Implementation classes renamed (FSAVer5Impl 
  into FSA5). Major tweaks and tunes to the API.

* Added support for version 5 automata built with NUMBERS flag (an extra 
  field stored for each node).

======================= morfologik-stemming 1.2.2 =======================

* License switch to plain BSD (removed the patent clause which did not 
  make much sense anyway).

* The build ZIP now includes licenses for individual JARs (prevents 
  confusion). 

======================= morfologik-stemming 1.2.1 =======================

* Fixed tool launching routines.

======================= morfologik-stemming 1.2.0 =======================

* Package hierarchy reorganized.

* Removed stempel (heuristic stemmer for Polish).

* Code updated to Java 1.5. 

* The API has changed in many places (enums instead of constants, 
  generics, iterables, removed explicit Arc and Node classes and replaced 
  by int pointers).

* FSA traversal in version 1.2 is implemented on top of primitive data 
  structures (int pointers) to keep memory usage minimal. The speed 
  boost gained from this is enormous and justifies less readable code. We
  strongly advise to use the provided iterators and helper functions 
  for matching state sequences in the FSA.

* Tools updated. Dumping existing FSAs is much, much faster now.        

======================= morfologik-stemming 1.1.4 =======================

* Fixed a bug that caused UTF-8 dictionaries to be garbled. Now it 
  should be relatively safe to use UTF-8 dictionaries (note: separators 
  cannot be multibyte UTF-8 characters, yet this is probably a very 
  rare case).

======================= morfologik-stemming 1.1.3 =======================

* Fixed a bug causing NPE when the library is called with null context 
  class loader  (happens when JVM is invoked from an JNI-attached 
  thread). Thanks to Patrick Luby for report and detailed analysis.

* Updated the built-in dictionary to the newest version available. 

======================= morfologik-stemming 1.1.2 =======================

* Fixed a bug causing JAR file locking (by implementing a workaround).

* Fixed the build script (manifest file was broken).

======================= morfologik-stemming 1.1.1 =======================

* Distribution script fixes. The final JAR does not contain test classes 
  and resources. Size trimmed almost twice compared to release 1.1.

* Updated the dump tool to accept dictionary metadata files.

======================= morfologik-stemming 1.1 =========================

* Introduced an auxiliary "meta" information files about compressed 
  dictionaries. Such information include delimiter symbol, encoding 
  and infix/prefix/postfix decoding info.

* The API has changed (repackaging). Some deprecated methods have been 
  removed. This is a major redesign/ upgrade, you will have to adjust 
  your source code.

* Cleaned up APIs and interfaces.

* Added infrastructure for command-line tool launching.

* Cleaned up tests.

* Changed project name to morfologik-stemmers and ownership to 
  (c) Morfologik.

======================= morfologik-stemming 1.0.7 =======================

* Removed one bug in fsa 'compression' decoding.

======================= morfologik-stemming 1.0.6 =======================

* Customized version of stempel replaced with a standard distribution.

* Removed deprecated methods and classes.
          
* Added infix and prefix encoding support for fsa dictionaries.

======================= morfologik-stemming 1.0.5 =======================

* Added filler and separator char dumps to FSADump.
          
* A major bug in automaton traversal corrected. Upgrade when possible.
          
* Certain API changes were introduced; older methods are now deprecated
  and will be removed in the future.

======================= morfologik-stemming 1.0.4 =======================

* Licenses for full and no-dict versions.

======================= morfologik-stemming 1.0.3 =======================

* Project code moved to SourceForge (subproject of Morfologik).
  LICENSE CHANGED FROM PUBLIC DOMAIN TO BSD (doesn't change much, but 
  clarifies legal issues).

======================= morfologik-stemming 1.0.2 =======================

* Added a Lametyzator constructor which allows custom dictionary stream, 
  field delimiters and encoding. Added an option for building stand-alone 
  JAR that does not include the default polish dictionary.

======================= morfologik-stemming 1.0.1 =======================

* Code cleanups. Added a method that returns the third automaton's column
  (form).

======================= morfologik-stemming 1.0 =========================

* Initial release


================================================
FILE: CONTRIBUTING.txt
================================================
Contributions are welcome!

Use a modern Java version for compilation and testing (JDK 21+ recommended).

If you use Eclipse, set up project formatting and validation with:

mvn -Peclipse

================================================
FILE: LICENSE.txt
================================================

Copyright (c) 2006 Dawid Weiss
Copyright (c) 2007-2015 Dawid Weiss, Marcin Miłkowski
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, 
are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice, 
    this list of conditions and the following disclaimer.
    
    * Redistributions in binary form must reproduce the above copyright notice, 
    this list of conditions and the following disclaimer in the documentation 
    and/or other materials provided with the distribution.
    
    * Neither the name of Morfologik nor the names of its contributors 
    may be used to endorse or promote products derived from this software 
    without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

================================================
FILE: README.txt
================================================
MORFOLOGIK
==========

Tools for finite state automata construction and dictionary-based 
morphological dictionaries.

Morphosyntactic dictionary for the Polish language.

See the following for more information:
  Wiki: https://github.com/morfologik/morfologik-stemming/wiki
  Bugs: https://github.com/morfologik/morfologik-stemming/issues

See CONTRIBUTING.txt if you'd like to add or change something.

See LICENSE.txt to make your company's lawyer happy.

See CHANGES.txt for API changes and updates.

(c) Marcin Miłkowski, Dawid Weiss


================================================
FILE: etc/eclipse/settings/org.eclipse.jdt.core.prefs
================================================
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.annotation.inheritNullAnnotations=disabled
org.eclipse.jdt.core.compiler.annotation.missingNonNullByDefaultAnnotation=ignore
org.eclipse.jdt.core.compiler.annotation.nonnull=org.eclipse.jdt.annotation.NonNull
org.eclipse.jdt.core.compiler.annotation.nonnullbydefault=org.eclipse.jdt.annotation.NonNullByDefault
org.eclipse.jdt.core.compiler.annotation.nullable=org.eclipse.jdt.annotation.Nullable
org.eclipse.jdt.core.compiler.annotation.nullanalysis=disabled
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=1.7
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.doc.comment.support=enabled
org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
org.eclipse.jdt.core.compiler.problem.comparingIdentical=warning
org.eclipse.jdt.core.compiler.problem.deadCode=warning
org.eclipse.jdt.core.compiler.problem.deprecation=warning
org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
org.eclipse.jdt.core.compiler.problem.emptyStatement=ignore
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.explicitlyClosedAutoCloseable=ignore
org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
org.eclipse.jdt.core.compiler.problem.fatalOptionalError=disabled
org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning
org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
org.eclipse.jdt.core.compiler.problem.includeNullInfoFromAsserts=disabled
org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=warning
org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
org.eclipse.jdt.core.compiler.problem.invalidJavadoc=error
org.eclipse.jdt.core.compiler.problem.invalidJavadocTags=enabled
org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsDeprecatedRef=disabled
org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsNotVisibleRef=disabled
org.eclipse.jdt.core.compiler.problem.invalidJavadocTagsVisibility=protected
org.eclipse.jdt.core.compiler.problem.localVariableHiding=ignore
org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=warning
org.eclipse.jdt.core.compiler.problem.missingDefaultCase=ignore
org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
org.eclipse.jdt.core.compiler.problem.missingEnumCaseDespiteDefault=disabled
org.eclipse.jdt.core.compiler.problem.missingHashCodeMethod=ignore
org.eclipse.jdt.core.compiler.problem.missingJavadocComments=ignore
org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsOverriding=disabled
org.eclipse.jdt.core.compiler.problem.missingJavadocCommentsVisibility=public
org.eclipse.jdt.core.compiler.problem.missingJavadocTagDescription=return_tag
org.eclipse.jdt.core.compiler.problem.missingJavadocTags=error
org.eclipse.jdt.core.compiler.problem.missingJavadocTagsMethodTypeParameters=disabled
org.eclipse.jdt.core.compiler.problem.missingJavadocTagsOverriding=disabled
org.eclipse.jdt.core.compiler.problem.missingJavadocTagsVisibility=protected
org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotationForInterfaceMethodImplementation=enabled
org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
org.eclipse.jdt.core.compiler.problem.missingSynchronizedOnInheritedMethod=ignore
org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
org.eclipse.jdt.core.compiler.problem.nonnullParameterAnnotationDropped=warning
org.eclipse.jdt.core.compiler.problem.nullAnnotationInferenceConflict=error
org.eclipse.jdt.core.compiler.problem.nullReference=warning
org.eclipse.jdt.core.compiler.problem.nullSpecViolation=error
org.eclipse.jdt.core.compiler.problem.nullUncheckedConversion=warning
org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=ignore
org.eclipse.jdt.core.compiler.problem.potentialNullReference=ignore
org.eclipse.jdt.core.compiler.problem.potentiallyUnclosedCloseable=ignore
org.eclipse.jdt.core.compiler.problem.rawTypeReference=warning
org.eclipse.jdt.core.compiler.problem.redundantNullAnnotation=warning
org.eclipse.jdt.core.compiler.problem.redundantNullCheck=ignore
org.eclipse.jdt.core.compiler.problem.redundantSpecificationOfTypeArguments=ignore
org.eclipse.jdt.core.compiler.problem.redundantSuperinterface=ignore
org.eclipse.jdt.core.compiler.problem.reportMethodCanBePotentiallyStatic=ignore
org.eclipse.jdt.core.compiler.problem.reportMethodCanBeStatic=ignore
org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
org.eclipse.jdt.core.compiler.problem.suppressOptionalErrors=disabled
org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
org.eclipse.jdt.core.compiler.problem.syntacticNullAnalysisForFields=disabled
org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
org.eclipse.jdt.core.compiler.problem.unavoidableGenericTypeProblems=enabled
org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=warning
org.eclipse.jdt.core.compiler.problem.unclosedCloseable=warning
org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionExemptExceptionAndThrowable=enabled
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionIncludeDocCommentReference=enabled
org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
org.eclipse.jdt.core.compiler.problem.unusedImport=warning
org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
org.eclipse.jdt.core.compiler.problem.unusedObjectAllocation=ignore
org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
org.eclipse.jdt.core.compiler.problem.unusedParameterIncludeDocCommentReference=enabled
org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
org.eclipse.jdt.core.compiler.problem.unusedTypeParameter=ignore
org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
org.eclipse.jdt.core.compiler.source=1.7
org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
org.eclipse.jdt.core.formatter.alignment_for_assignment=0
org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0
org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80
org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16
org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
org.eclipse.jdt.core.formatter.blank_lines_after_package=1
org.eclipse.jdt.core.formatter.blank_lines_before_field=0
org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
org.eclipse.jdt.core.formatter.blank_lines_before_method=1
org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
org.eclipse.jdt.core.formatter.blank_lines_before_package=0
org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_lambda_body=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
org.eclipse.jdt.core.formatter.comment.format_block_comments=false
org.eclipse.jdt.core.formatter.comment.format_header=false
org.eclipse.jdt.core.formatter.comment.format_html=true
org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
org.eclipse.jdt.core.formatter.comment.format_line_comments=false
org.eclipse.jdt.core.formatter.comment.format_source_code=true
org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert
org.eclipse.jdt.core.formatter.comment.line_length=80
org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true
org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true
org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false
org.eclipse.jdt.core.formatter.compact_else_if=true
org.eclipse.jdt.core.formatter.continuation_indentation=2
org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off
org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on
org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
org.eclipse.jdt.core.formatter.indent_empty_lines=false
org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=true
org.eclipse.jdt.core.formatter.indentation.size=2
org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert
org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert
org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert
org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert
org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert
org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_after_type_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert
org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
org.eclipse.jdt.core.formatter.insert_space_after_lambda_arrow=insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert
org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_lambda_arrow=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert
org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert
org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
org.eclipse.jdt.core.formatter.join_lines_in_comments=true
org.eclipse.jdt.core.formatter.join_wrapped_lines=true
org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
org.eclipse.jdt.core.formatter.lineSplit=120
org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
org.eclipse.jdt.core.formatter.tabulation.char=space
org.eclipse.jdt.core.formatter.tabulation.size=2
org.eclipse.jdt.core.formatter.use_on_off_tags=true
org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true
org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true


================================================
FILE: etc/eclipse/settings/org.eclipse.m2e.core.prefs
================================================
activeProfiles=eclipse
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1


================================================
FILE: etc/forbidden-apis/signatures.txt
================================================
@defaultMessage Convert to URI
java.net.URL#getPath()
java.net.URL#getFile()

@defaultMessage spawns threads with vague names; use a custom thread factory and name threads so that you can tell (by its name) which executor it is associated with
java.util.concurrent.Executors#newFixedThreadPool(int)
java.util.concurrent.Executors#newSingleThreadExecutor()
java.util.concurrent.Executors#newCachedThreadPool()
java.util.concurrent.Executors#newSingleThreadScheduledExecutor()
java.util.concurrent.Executors#newScheduledThreadPool(int)
java.util.concurrent.Executors#defaultThreadFactory()
java.util.concurrent.Executors#privilegedThreadFactory()

java.lang.Character#codePointBefore(char[],int) @ Implicit start offset is error-prone when the char[] is a buffer and the first chars are random chars
java.lang.Character#codePointAt(char[],int) @ Implicit end offset is error-prone when the char[] is a buffer and the last chars are random chars

@defaultMessage Please do not try to stop the world
java.lang.System#gc()

@defaultMessage Use Channels.* methods to write to channels. Do not write directly.
java.nio.channels.WritableByteChannel#write(java.nio.ByteBuffer)
java.nio.channels.FileChannel#write(java.nio.ByteBuffer, long)
java.nio.channels.GatheringByteChannel#write(java.nio.ByteBuffer[], int, int)
java.nio.channels.GatheringByteChannel#write(java.nio.ByteBuffer[])
java.nio.channels.ReadableByteChannel#read(java.nio.ByteBuffer)
java.nio.channels.ScatteringByteChannel#read(java.nio.ByteBuffer[])
java.nio.channels.ScatteringByteChannel#read(java.nio.ByteBuffer[], int, int)
java.nio.channels.FileChannel#read(java.nio.ByteBuffer, long)

@defaultMessage Filters are trappy (add suppression or make sure all read methods are redelegated).
java.io.FilterInputStream#<init>(java.io.InputStream)
java.io.FilterOutputStream#<init>(java.io.OutputStream)
java.io.FilterReader#<init>(java.io.Reader)
java.io.FilterWriter#<init>(java.io.Writer)

#@defaultMessage Do not use context class loaders, prefer explicit ClassLoader argument.
java.lang.Thread@getContextClassLoader()
java.lang.Thread@setContextClassLoader()


================================================
FILE: morfologik-fsa/pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

  <modelVersion>4.0.0</modelVersion>

  <parent>
    <groupId>org.carrot2</groupId>
    <artifactId>morfologik-parent</artifactId>
    <version>2.2.0-SNAPSHOT</version>
    <relativePath>../pom.xml</relativePath>
  </parent>

  <artifactId>morfologik-fsa</artifactId>
  <packaging>bundle</packaging>

  <name>Morfologik FSA (Traversal)</name>
  <description>Morfologik Finite State Automata Traversal.</description>
  
  <properties>
    <forbiddenapis.signaturefile>../etc/forbidden-apis/signatures.txt</forbiddenapis.signaturefile>
    <project.moduleId>org.carrot2.morfologik.fsa</project.moduleId>
  </properties>

  <build>
    <plugins>
      <plugin>
        <groupId>org.apache.felix</groupId>
        <artifactId>maven-bundle-plugin</artifactId>
        <configuration>
          <instructions>
            <Export-Package>morfologik.fsa</Export-Package>
            <Import-Package>*</Import-Package>
          </instructions>
        </configuration>
      </plugin>
    </plugins>
  </build>
</project>


================================================
FILE: morfologik-fsa/src/main/java/morfologik/fsa/ByteSequenceIterator.java
================================================
package morfologik.fsa;

import java.nio.ByteBuffer;
import java.util.*;

/**
 * An iterator that traverses the right language of a given node (all sequences reachable from a
 * given node).
 */
public final class ByteSequenceIterator implements Iterator<ByteBuffer> {
  /**
   * Default expected depth of the recursion stack (estimated longest sequence in the automaton).
   * Buffers expand by the same value if exceeded.
   */
  private static final int EXPECTED_MAX_STATES = 15;

  /** The FSA to which this iterator belongs. */
  private final FSA fsa;

  /** An internal cache for the next element in the FSA */
  private ByteBuffer nextElement;

  /** A buffer for the current sequence of bytes from the current node to the root. */
  private byte[] buffer = new byte[EXPECTED_MAX_STATES];

  /** Reusable byte buffer wrapper around {@link #buffer}. */
  private ByteBuffer bufferWrapper = ByteBuffer.wrap(buffer);

  /** An arc stack for DFS when processing the automaton. */
  private int[] arcs = new int[EXPECTED_MAX_STATES];

  /** Current processing depth in {@link #arcs}. */
  private int position;

  /**
   * Create an instance of the iterator iterating over all automaton sequences.
   *
   * @param fsa The automaton to iterate over.
   */
  public ByteSequenceIterator(FSA fsa) {
    this(fsa, fsa.getRootNode());
  }

  /**
   * Create an instance of the iterator for a given node.
   *
   * @param fsa The automaton to iterate over.
   * @param node The starting node's identifier (can be the {@link FSA#getRootNode()}).
   */
  public ByteSequenceIterator(FSA fsa, int node) {
    this.fsa = fsa;

    if (fsa.getFirstArc(node) != 0) {
      restartFrom(node);
    }
  }

  /**
   * Restart walking from <code>node</code>. Allows iterator reuse.
   *
   * @param node Restart the iterator from <code>node</code>.
   * @return Returns <code>this</code> for call chaining.
   */
  public ByteSequenceIterator restartFrom(int node) {
    position = 0;
    bufferWrapper.clear();
    nextElement = null;

    pushNode(node);
    return this;
  }

  /** Returns <code>true</code> if there are still elements in this iterator. */
  @Override
  public boolean hasNext() {
    if (nextElement == null) {
      nextElement = advance();
    }

    return nextElement != null;
  }

  /**
   * @return Returns a {@link ByteBuffer} with the sequence corresponding to the next final state in
   *     the automaton.
   */
  @Override
  public ByteBuffer next() {
    if (nextElement != null) {
      final ByteBuffer cache = nextElement;
      nextElement = null;
      return cache;
    } else {
      final ByteBuffer cache = advance();
      if (cache == null) {
        throw new NoSuchElementException();
      }
      return cache;
    }
  }

  /** Advances to the next available final state. */
  private final ByteBuffer advance() {
    if (position == 0) {
      return null;
    }

    while (position > 0) {
      final int lastIndex = position - 1;
      final int arc = arcs[lastIndex];

      if (arc == 0) {
        // Remove the current node from the queue.
        position--;
        continue;
      }

      // Go to the next arc, but leave it on the stack
      // so that we keep the recursion depth level accurate.
      arcs[lastIndex] = fsa.getNextArc(arc);

      // Expand buffer if needed.
      final int bufferLength = this.buffer.length;
      if (lastIndex >= bufferLength) {
        this.buffer = Arrays.copyOf(buffer, bufferLength + EXPECTED_MAX_STATES);
        this.bufferWrapper = ByteBuffer.wrap(buffer);
      }
      buffer[lastIndex] = fsa.getArcLabel(arc);

      if (!fsa.isArcTerminal(arc)) {
        // Recursively descend into the arc's node.
        pushNode(fsa.getEndNode(arc));
      }

      if (fsa.isArcFinal(arc)) {
        bufferWrapper.clear();
        bufferWrapper.limit(lastIndex + 1);
        return bufferWrapper;
      }
    }

    return null;
  }

  /** Not implemented in this iterator. */
  @Override
  public void remove() {
    throw new UnsupportedOperationException("Read-only iterator.");
  }

  /** Descends to a given node, adds its arcs to the stack to be traversed. */
  private void pushNode(int node) {
    // Expand buffers if needed.
    if (position == arcs.length) {
      arcs = Arrays.copyOf(arcs, arcs.length + EXPECTED_MAX_STATES);
    }

    arcs[position++] = fsa.getFirstArc(node);
  }
}


================================================
FILE: morfologik-fsa/src/main/java/morfologik/fsa/CFSA.java
================================================
package morfologik.fsa;

import static morfologik.fsa.FSAFlags.*;

import java.io.*;
import java.util.*;

/**
 * CFSA (Compact Finite State Automaton) binary format implementation. This is a slightly
 * reorganized version of {@link FSA5} offering smaller automata size at some (minor) performance
 * penalty.
 *
 * <p><b>Note:</b> Serialize to {@link CFSA2} for new code.
 *
 * <p>The encoding of automaton body is as follows.
 *
 * <pre>
 * ---- FSA header (standard)
 * Byte                            Description
 *       +-+-+-+-+-+-+-+-+\
 *     0 | | | | | | | | | +------ '\'
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     1 | | | | | | | | | +------ 'f'
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     2 | | | | | | | | | +------ 's'
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     3 | | | | | | | | | +------ 'a'
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     4 | | | | | | | | | +------ version (fixed 0xc5)
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     5 | | | | | | | | | +------ filler character
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     6 | | | | | | | | | +------ annot character
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     7 |C|C|C|C|G|G|G|G| +------ C - node data size (ctl), G - address size (gotoLength)
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *  8-32 | | | | | | | | | +------ labels mapped for type (1) of arc encoding.
 *       : : : : : : : : : |
 *       +-+-+-+-+-+-+-+-+/
 *
 * ---- Start of a node; only if automaton was compiled with NUMBERS option.
 *
 * Byte
 *        +-+-+-+-+-+-+-+-+\
 *      0 | | | | | | | | | \  LSB
 *        +-+-+-+-+-+-+-+-+  +
 *      1 | | | | | | | | |  |      number of strings recognized
 *        +-+-+-+-+-+-+-+-+  +----- by the automaton starting
 *        : : : : : : : : :  |      from this node.
 *        +-+-+-+-+-+-+-+-+  +
 *  ctl-1 | | | | | | | | | /  MSB
 *        +-+-+-+-+-+-+-+-+/
 *
 * ---- A vector of node's arcs. Conditional format, depending on flags.
 *
 * 1) NEXT bit set, mapped arc label.
 *
 *                +--------------- arc's label mapped in M bits if M's field value &gt; 0
 *                | +------------- node pointed to is next
 *                | | +----------- the last arc of the node
 *         _______| | | +--------- the arc is final
 *        /       | | | |
 *       +-+-+-+-+-+-+-+-+\
 *     0 |M|M|M|M|M|1|L|F| +------ flags + (M) index of the mapped label.
 *       +-+-+-+-+-+-+-+-+/
 *
 * 2) NEXT bit set, label separate.
 *
 *                +--------------- arc's label stored separately (M's field is zero).
 *                | +------------- node pointed to is next
 *                | | +----------- the last arc of the node
 *                | | | +--------- the arc is final
 *                | | | |
 *       +-+-+-+-+-+-+-+-+\
 *     0 |0|0|0|0|0|1|L|F| +------ flags
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     1 | | | | | | | | | +------ label
 *       +-+-+-+-+-+-+-+-+/
 *
 * 3) NEXT bit not set. Full arc.
 *
 *                  +------------- node pointed to is next
 *                  | +----------- the last arc of the node
 *                  | | +--------- the arc is final
 *                  | | |
 *       +-+-+-+-+-+-+-+-+\
 *     0 |A|A|A|A|A|0|L|F| +------ flags + (A) address field, lower bits
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     1 | | | | | | | | | +------ label
 *       +-+-+-+-+-+-+-+-+/
 *       : : : : : : : : :
 *       +-+-+-+-+-+-+-+-+\
 * gtl-1 |A|A|A|A|A|A|A|A| +------ address, continuation (MSB)
 *       +-+-+-+-+-+-+-+-+/
 * </pre>
 */
public final class CFSA extends FSA {
  /** Automaton header version value. */
  public static final byte VERSION = (byte) 0xC5;

  /**
   * Bitmask indicating that an arc corresponds to the last character of a sequence available when
   * building the automaton.
   */
  public static final int BIT_FINAL_ARC = 1 << 0;

  /**
   * Bitmask indicating that an arc is the last one of the node's list and the following one belongs
   * to another node.
   */
  public static final int BIT_LAST_ARC = 1 << 1;

  /**
   * Bitmask indicating that the target node of this arc follows it in the compressed automaton
   * structure (no goto field).
   */
  public static final int BIT_TARGET_NEXT = 1 << 2;

  /**
   * An array of bytes with the internal representation of the automaton. Please see the
   * documentation of this class for more information on how this structure is organized.
   */
  public byte[] arcs;

  /**
   * The length of the node header structure (if the automaton was compiled with <code>NUMBERS
   * </code> option). Otherwise zero.
   */
  public final int nodeDataLength;

  /** Flags for this automaton version. */
  private final Set<FSAFlags> flags;

  /** Number of bytes each address takes in full, expanded form (goto length). */
  public final int gtl;

  /**
   * Label mapping for arcs of type (1) (see class documentation). The array is indexed by mapped
   * label's value and contains the original label.
   */
  public final byte[] labelMapping;

  /** Creates a new automaton, reading it from a file in FSA format, version 5. */
  CFSA(InputStream stream) throws IOException {
    DataInputStream in = new DataInputStream(stream);

    // Skip legacy header fields.
    in.readByte(); // filler
    in.readByte(); // annotation
    final byte hgtl = in.readByte();

    /*
     * Determine if the automaton was compiled with NUMBERS. If so, modify
     * ctl and goto fields accordingly.
     */
    flags = EnumSet.of(FLEXIBLE, STOPBIT, NEXTBIT);
    if ((hgtl & 0xf0) != 0) {
      this.nodeDataLength = (hgtl >>> 4) & 0x0f;
      this.gtl = hgtl & 0x0f;
      flags.add(NUMBERS);
    } else {
      this.nodeDataLength = 0;
      this.gtl = hgtl & 0x0f;
    }

    /*
     * Read mapping dictionary.
     */
    labelMapping = new byte[1 << 5];
    in.readFully(labelMapping);

    /*
     * Read arcs' data.
     */
    arcs = readRemaining(in);
  }

  /**
   * Returns the start node of this automaton. May return <code>0</code> if the start node is also
   * an end node.
   */
  @Override
  public int getRootNode() {
    // Skip dummy node marking terminating state.
    final int epsilonNode = skipArc(getFirstArc(0));

    // And follow the epsilon node's first (and only) arc.
    return getDestinationNodeOffset(getFirstArc(epsilonNode));
  }

  /** {@inheritDoc} */
  @Override
  public final int getFirstArc(int node) {
    return nodeDataLength + node;
  }

  /** {@inheritDoc} */
  @Override
  public final int getNextArc(int arc) {
    if (isArcLast(arc)) return 0;
    else return skipArc(arc);
  }

  /** {@inheritDoc} */
  @Override
  public int getArc(int node, byte label) {
    for (int arc = getFirstArc(node); arc != 0; arc = getNextArc(arc)) {
      if (getArcLabel(arc) == label) return arc;
    }

    // An arc labeled with "label" not found.
    return 0;
  }

  /** {@inheritDoc} */
  @Override
  public int getEndNode(int arc) {
    final int nodeOffset = getDestinationNodeOffset(arc);
    if (0 == nodeOffset) {
      throw new RuntimeException("This is a terminal arc [" + arc + "]");
    }
    return nodeOffset;
  }

  /** {@inheritDoc} */
  @Override
  public byte getArcLabel(int arc) {
    if (isNextSet(arc) && isLabelCompressed(arc)) {
      return this.labelMapping[(arcs[arc] >>> 3) & 0x1f];
    } else {
      return arcs[arc + 1];
    }
  }

  /** {@inheritDoc} */
  @Override
  public int getRightLanguageCount(int node) {
    assert getFlags().contains(FSAFlags.NUMBERS) : "This FSA was not compiled with NUMBERS.";
    return FSA5.decodeFromBytes(arcs, node, nodeDataLength);
  }

  /** {@inheritDoc} */
  @Override
  public boolean isArcFinal(int arc) {
    return (arcs[arc] & BIT_FINAL_ARC) != 0;
  }

  /** {@inheritDoc} */
  @Override
  public boolean isArcTerminal(int arc) {
    return (0 == getDestinationNodeOffset(arc));
  }

  /**
   * Returns <code>true</code> if this arc has <code>NEXT</code> bit set.
   *
   * @see #BIT_LAST_ARC
   * @param arc The node's arc identifier.
   * @return Returns true if the argument is the last arc of a node.
   */
  public boolean isArcLast(int arc) {
    return (arcs[arc] & BIT_LAST_ARC) != 0;
  }

  /**
   * @see #BIT_TARGET_NEXT
   * @param arc The node's arc identifier.
   * @return Returns true if {@link #BIT_TARGET_NEXT} is set for this arc.
   */
  public boolean isNextSet(int arc) {
    return (arcs[arc] & BIT_TARGET_NEXT) != 0;
  }

  /**
   * @param arc The node's arc identifier.
   * @return Returns <code>true</code> if the label is compressed inside flags byte.
   */
  public boolean isLabelCompressed(int arc) {
    assert isNextSet(arc) : "Only applicable to arcs with NEXT bit.";
    return (arcs[arc] & (-1 << 3)) != 0;
  }

  /**
   * {@inheritDoc}
   *
   * <p>For this automaton version, an additional {@link FSAFlags#NUMBERS} flag may be set to
   * indicate the automaton contains extra fields for each node.
   */
  public Set<FSAFlags> getFlags() {
    return Collections.unmodifiableSet(flags);
  }

  /** Returns the address of the node pointed to by this arc. */
  final int getDestinationNodeOffset(int arc) {
    if (isNextSet(arc)) {
      /* The destination node follows this arc in the array. */
      return skipArc(arc);
    } else {
      /*
       * The destination node address has to be extracted from the arc's
       * goto field.
       */
      int r = 0;
      for (int i = gtl; --i >= 1; ) {
        r = r << 8 | (arcs[arc + 1 + i] & 0xff);
      }
      r = r << 8 | (arcs[arc] & 0xff);
      return r >>> 3;
    }
  }

  /** Read the arc's layout and skip as many bytes, as needed, to skip it. */
  private int skipArc(int offset) {
    if (isNextSet(offset)) {
      if (isLabelCompressed(offset)) {
        offset++;
      } else {
        offset += 1 + 1;
      }
    } else {
      offset += 1 + gtl;
    }
    return offset;
  }
}


================================================
FILE: morfologik-fsa/src/main/java/morfologik/fsa/CFSA2.java
================================================
package morfologik.fsa;

import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.EnumSet;
import java.util.Set;

/**
 * CFSA (Compact Finite State Automaton) binary format implementation, version 2:
 *
 * <ul>
 *   <li>{@link #BIT_TARGET_NEXT} applicable on all arcs, not necessarily the last one.
 *   <li>v-coded goto field
 *   <li>v-coded perfect hashing numbers, if any
 *   <li>31 most frequent labels integrated with flags byte
 * </ul>
 *
 * <p>The encoding of automaton body is as follows.
 *
 * <pre>
 * ---- CFSA header
 * Byte                            Description
 *       +-+-+-+-+-+-+-+-+\
 *     0 | | | | | | | | | +------ '\'
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     1 | | | | | | | | | +------ 'f'
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     2 | | | | | | | | | +------ 's'
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     3 | | | | | | | | | +------ 'a'
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     4 | | | | | | | | | +------ version (fixed 0xc6)
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     5 | | | | | | | | | +----\
 *       +-+-+-+-+-+-+-+-+/      \ flags [MSB first]
 *       +-+-+-+-+-+-+-+-+\      /
 *     6 | | | | | | | | | +----/
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     7 | | | | | | | | | +------ label lookup table size
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *  8-32 | | | | | | | | | +------ label value lookup table
 *       : : : : : : : : : |
 *       +-+-+-+-+-+-+-+-+/
 *
 * ---- Start of a node; only if automaton was compiled with NUMBERS option.
 *
 * Byte
 *        +-+-+-+-+-+-+-+-+\
 *      0 | | | | | | | | | \
 *        +-+-+-+-+-+-+-+-+  +
 *      1 | | | | | | | | |  |      number of strings recognized
 *        +-+-+-+-+-+-+-+-+  +----- by the automaton starting
 *        : : : : : : : : :  |      from this node. v-coding
 *        +-+-+-+-+-+-+-+-+  +
 *        | | | | | | | | | /
 *        +-+-+-+-+-+-+-+-+/
 *
 * ---- A vector of this node's arcs. An arc's layout depends on the combination of flags.
 *
 * 1) NEXT bit set, mapped arc label.
 *
 *        +----------------------- node pointed to is next
 *        | +--------------------- the last arc of the node
 *        | | +------------------- this arc leads to a final state (acceptor)
 *        | | |  _______+--------- arc's label; indexed if M &gt; 0, otherwise explicit label follows
 *        | | | / | | | |
 *       +-+-+-+-+-+-+-+-+\
 *     0 |N|L|F|M|M|M|M|M| +------ flags + (M) index of the mapped label.
 *       +-+-+-+-+-+-+-+-+/
 *       +-+-+-+-+-+-+-+-+\
 *     1 | | | | | | | | | +------ optional label if M == 0
 *       +-+-+-+-+-+-+-+-+/
 *       : : : : : : : : :
 *       +-+-+-+-+-+-+-+-+\
 *       |A|A|A|A|A|A|A|A| +------ v-coded goto address
 *       +-+-+-+-+-+-+-+-+/
 * </pre>
 */
public final class CFSA2 extends FSA {
  /** Automaton header version value. */
  public static final byte VERSION = (byte) 0xc6;

  /** The target node of this arc follows the last arc of the current state (no goto field). */
  public static final int BIT_TARGET_NEXT = 1 << 7;

  /** The arc is the last one from the current node's arcs list. */
  public static final int BIT_LAST_ARC = 1 << 6;

  /**
   * The arc corresponds to the last character of a sequence available when building the automaton
   * (acceptor transition).
   */
  public static final int BIT_FINAL_ARC = 1 << 5;

  /** The count of bits assigned to storing an indexed label. */
  static final int LABEL_INDEX_BITS = 5;

  /** Masks only the M bits of a flag byte. */
  static final int LABEL_INDEX_MASK = (1 << LABEL_INDEX_BITS) - 1;

  /** Maximum size of the labels index. */
  public static final int LABEL_INDEX_SIZE = (1 << LABEL_INDEX_BITS) - 1;

  /**
   * An array of bytes with the internal representation of the automaton. Please see the
   * documentation of this class for more information on how this structure is organized.
   */
  public byte[] arcs;

  /** Flags for this automaton version. */
  private final EnumSet<FSAFlags> flags;

  /** Label mapping for M-indexed labels. */
  public final byte[] labelMapping;

  /** If <code>true</code> states are prepended with numbers. */
  private final boolean hasNumbers;

  /** Epsilon node's offset. */
  private final int epsilon = 0;

  /** Reads an automaton from a byte stream. */
  CFSA2(InputStream stream) throws IOException {
    DataInputStream in = new DataInputStream(stream);

    // Read flags.
    short flagBits = in.readShort();
    flags = EnumSet.noneOf(FSAFlags.class);
    for (FSAFlags f : FSAFlags.values()) {
      if (f.isSet(flagBits)) {
        flags.add(f);
      }
    }

    if (flagBits != FSAFlags.asShort(flags)) {
      throw new IOException("Unrecognized flags: 0x" + Integer.toHexString(flagBits));
    }

    this.hasNumbers = flags.contains(FSAFlags.NUMBERS);

    /*
     * Read mapping dictionary.
     */
    int labelMappingSize = in.readByte() & 0xff;
    labelMapping = new byte[labelMappingSize];
    in.readFully(labelMapping);

    /*
     * Read arcs' data.
     */
    arcs = readRemaining(in);
  }

  /** {@inheritDoc} */
  @Override
  public int getRootNode() {
    // Skip dummy node marking terminating state.
    return getDestinationNodeOffset(getFirstArc(epsilon));
  }

  /** {@inheritDoc} */
  @Override
  public final int getFirstArc(int node) {
    if (hasNumbers) {
      return skipVInt(node);
    } else {
      return node;
    }
  }

  /** {@inheritDoc} */
  @Override
  public final int getNextArc(int arc) {
    if (isArcLast(arc)) {
      return 0;
    } else {
      return skipArc(arc);
    }
  }

  /** {@inheritDoc} */
  @Override
  public int getArc(int node, byte label) {
    for (int arc = getFirstArc(node); arc != 0; arc = getNextArc(arc)) {
      if (getArcLabel(arc) == label) {
        return arc;
      }
    }

    // An arc labeled with "label" not found.
    return 0;
  }

  /** {@inheritDoc} */
  @Override
  public int getEndNode(int arc) {
    final int nodeOffset = getDestinationNodeOffset(arc);
    assert nodeOffset != 0 : "Can't follow a terminal arc: " + arc;
    assert nodeOffset < arcs.length : "Node out of bounds.";
    return nodeOffset;
  }

  /** {@inheritDoc} */
  @Override
  public byte getArcLabel(int arc) {
    int index = arcs[arc] & LABEL_INDEX_MASK;
    if (index > 0) {
      return this.labelMapping[index];
    } else {
      return arcs[arc + 1];
    }
  }

  /** {@inheritDoc} */
  @Override
  public int getRightLanguageCount(int node) {
    assert getFlags().contains(FSAFlags.NUMBERS) : "This FSA was not compiled with NUMBERS.";
    return readVInt(arcs, node);
  }

  /** {@inheritDoc} */
  @Override
  public boolean isArcFinal(int arc) {
    return (arcs[arc] & BIT_FINAL_ARC) != 0;
  }

  /** {@inheritDoc} */
  @Override
  public boolean isArcTerminal(int arc) {
    return (0 == getDestinationNodeOffset(arc));
  }

  /**
   * Returns <code>true</code> if this arc has <code>NEXT</code> bit set.
   *
   * @see #BIT_LAST_ARC
   * @param arc The node's arc identifier.
   * @return Returns true if the argument is the last arc of a node.
   */
  public boolean isArcLast(int arc) {
    return (arcs[arc] & BIT_LAST_ARC) != 0;
  }

  /**
   * @see #BIT_TARGET_NEXT
   * @param arc The node's arc identifier.
   * @return Returns true if {@link #BIT_TARGET_NEXT} is set for this arc.
   */
  public boolean isNextSet(int arc) {
    return (arcs[arc] & BIT_TARGET_NEXT) != 0;
  }

  /** {@inheritDoc} */
  public Set<FSAFlags> getFlags() {
    return flags;
  }

  /** Returns the address of the node pointed to by this arc. */
  final int getDestinationNodeOffset(int arc) {
    if (isNextSet(arc)) {
      /* Follow until the last arc of this state. */
      while (!isArcLast(arc)) {
        arc = getNextArc(arc);
      }

      /* And return the byte right after it. */
      return skipArc(arc);
    } else {
      /*
       * The destination node address is v-coded. v-code starts either
       * at the next byte (label indexed) or after the next byte (label explicit).
       */
      return readVInt(arcs, arc + ((arcs[arc] & LABEL_INDEX_MASK) == 0 ? 2 : 1));
    }
  }

  /** Read the arc's layout and skip as many bytes, as needed, to skip it. */
  private int skipArc(int offset) {
    int flag = arcs[offset++];

    // Explicit label?
    if ((flag & LABEL_INDEX_MASK) == 0) {
      offset++;
    }

    // Explicit goto?
    if ((flag & BIT_TARGET_NEXT) == 0) {
      offset = skipVInt(offset);
    }

    assert offset < this.arcs.length;
    return offset;
  }

  /** Read a v-int. */
  static int readVInt(byte[] array, int offset) {
    byte b = array[offset];
    int value = b & 0x7F;

    for (int shift = 7; b < 0; shift += 7) {
      b = array[++offset];
      value |= (b & 0x7F) << shift;
    }

    return value;
  }

  /** Return the byte-length of a v-coded int. */
  static int vIntLength(int value) {
    assert value >= 0 : "Can't v-code negative ints.";

    int bytes;
    for (bytes = 1; value >= 0x80; bytes++) {
      value >>= 7;
    }

    return bytes;
  }

  /** Skip a v-int. */
  private int skipVInt(int offset) {
    while (arcs[offset++] < 0) {
      // Do nothing.
    }
    return offset;
  }
}


================================================
FILE: morfologik-fsa/src/main/java/morfologik/fsa/FSA.java
================================================
package morfologik.fsa;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.BitSet;
import java.util.Collections;
import java.util.Iterator;
import java.util.Locale;
import java.util.Set;

/**
 * This is a top abstract class for handling finite state automata. These automata are arc-based, a
 * design described in Jan Daciuk's <i>Incremental Construction of Finite-State Automata and
 * Transducers, and Their Use in the Natural Language Processing</i> (PhD thesis, Technical
 * University of Gdansk).
 */
public abstract class FSA implements Iterable<ByteBuffer> {
  /**
   * @return Returns the identifier of the root node of this automaton. Returns 0 if the start node
   *     is also the end node (the automaton is empty).
   */
  public abstract int getRootNode();

  /**
   * @param node Identifier of the node.
   * @return Returns the identifier of the first arc leaving <code>node</code> or 0 if the node has
   *     no outgoing arcs.
   */
  public abstract int getFirstArc(int node);

  /**
   * @param arc The arc's identifier.
   * @return Returns the identifier of the next arc after <code>arc</code> and leaving <code>node
   *     </code>. Zero is returned if no more arcs are available for the node.
   */
  public abstract int getNextArc(int arc);

  /**
   * @param node Identifier of the node.
   * @param label The arc's label.
   * @return Returns the identifier of an arc leaving <code>node</code> and labeled with <code>label
   *     </code>. An identifier equal to 0 means the node has no outgoing arc labeled <code>label
   *     </code>.
   */
  public abstract int getArc(int node, byte label);

  /**
   * @param arc The arc's identifier.
   * @return Return the label associated with a given <code>arc</code>.
   */
  public abstract byte getArcLabel(int arc);

  /**
   * @param arc The arc's identifier.
   * @return Returns <code>true</code> if the destination node at the end of this <code>arc</code>
   *     corresponds to an input sequence created when building this automaton.
   */
  public abstract boolean isArcFinal(int arc);

  /**
   * @param arc The arc's identifier.
   * @return Returns <code>true</code> if this <code>arc</code> does not have a terminating node
   *     (@link {@link #getEndNode(int)} will throw an exception). Implies {@link #isArcFinal(int)}.
   */
  public abstract boolean isArcTerminal(int arc);

  /**
   * @param arc The arc's identifier.
   * @return Return the end node pointed to by a given <code>arc</code>. Terminal arcs (those that
   *     point to a terminal state) have no end node representation and throw a runtime exception.
   */
  public abstract int getEndNode(int arc);

  /**
   * @return Returns a set of flags for this FSA instance.
   */
  public abstract Set<FSAFlags> getFlags();

  /**
   * @param node Identifier of the node.
   * @return Calculates and returns the number of arcs of a given node.
   */
  public int getArcCount(int node) {
    int count = 0;
    for (int arc = getFirstArc(node); arc != 0; arc = getNextArc(arc)) {
      count++;
    }
    return count;
  }

  /**
   * @param node Identifier of the node.
   * @return Returns the number of sequences reachable from the given state if the automaton was
   *     compiled with {@link FSAFlags#NUMBERS}. The size of the right language of the state, in
   *     other words.
   * @throws UnsupportedOperationException If the automaton was not compiled with {@link
   *     FSAFlags#NUMBERS}. The value can then be computed by manual count of {@link #getSequences}.
   */
  public int getRightLanguageCount(int node) {
    throw new UnsupportedOperationException("Automaton not compiled with " + FSAFlags.NUMBERS);
  }

  /**
   * Returns an iterator over all binary sequences starting at the given FSA state (node) and ending
   * in final nodes. This corresponds to a set of suffixes of a given prefix from all sequences
   * stored in the automaton.
   *
   * <p>The returned iterator is a {@link ByteBuffer} whose contents changes on each call to {@link
   * Iterator#next()}. The keep the contents between calls to {@link Iterator#next()}, one must copy
   * the buffer to some other location.
   *
   * <p><b>Important.</b> It is guaranteed that the returned byte buffer is backed by a byte array
   * and that the content of the byte buffer starts at the array's index 0.
   *
   * @param node Identifier of the starting node from which to return subsequences.
   * @return An iterable over all sequences encoded starting at the given node.
   */
  public Iterable<ByteBuffer> getSequences(final int node) {
    if (node == 0) {
      return Collections.<ByteBuffer>emptyList();
    }

    return new Iterable<ByteBuffer>() {
      public Iterator<ByteBuffer> iterator() {
        return new ByteSequenceIterator(FSA.this, node);
      }
    };
  }

  /**
   * An alias of calling {@link #iterator} directly ({@link FSA} is also {@link Iterable}).
   *
   * @return Returns all sequences encoded in the automaton.
   */
  public final Iterable<ByteBuffer> getSequences() {
    return getSequences(getRootNode());
  }

  /**
   * Returns an iterator over all binary sequences starting from the initial FSA state (node) and
   * ending in final nodes. The returned iterator is a {@link ByteBuffer} whose contents changes on
   * each call to {@link Iterator#next()}. The keep the contents between calls to {@link
   * Iterator#next()}, one must copy the buffer to some other location.
   *
   * <p><b>Important.</b> It is guaranteed that the returned byte buffer is backed by a byte array
   * and that the content of the byte buffer starts at the array's index 0.
   */
  public final Iterator<ByteBuffer> iterator() {
    return getSequences().iterator();
  }

  /**
   * Visit all states. The order of visiting is undefined. This method may be faster than traversing
   * the automaton in post or preorder since it can scan states linearly. Returning false from
   * {@link StateVisitor#accept(int)} immediately terminates the traversal.
   *
   * @param v Visitor to receive traversal calls.
   * @param <T> A subclass of {@link StateVisitor}.
   * @return Returns the argument (for access to anonymous class fields).
   */
  public <T extends StateVisitor> T visitAllStates(T v) {
    return visitInPostOrder(v);
  }

  /**
   * Same as {@link #visitInPostOrder(StateVisitor, int)}, starting from root automaton node.
   *
   * @param v Visitor to receive traversal calls.
   * @param <T> A subclass of {@link StateVisitor}.
   * @return Returns the argument (for access to anonymous class fields).
   */
  public <T extends StateVisitor> T visitInPostOrder(T v) {
    return visitInPostOrder(v, getRootNode());
  }

  /**
   * Visits all states reachable from <code>node</code> in postorder. Returning false from {@link
   * StateVisitor#accept(int)} immediately terminates the traversal.
   *
   * @param v Visitor to receive traversal calls.
   * @param <T> A subclass of {@link StateVisitor}.
   * @param node Identifier of the node.
   * @return Returns the argument (for access to anonymous class fields).
   */
  public <T extends StateVisitor> T visitInPostOrder(T v, int node) {
    visitInPostOrder(v, node, new BitSet());
    return v;
  }

  /** Private recursion. */
  private boolean visitInPostOrder(StateVisitor v, int node, BitSet visited) {
    if (visited.get(node)) return true;
    visited.set(node);

    for (int arc = getFirstArc(node); arc != 0; arc = getNextArc(arc)) {
      if (!isArcTerminal(arc)) {
        if (!visitInPostOrder(v, getEndNode(arc), visited)) return false;
      }
    }

    return v.accept(node);
  }

  /**
   * Same as {@link #visitInPreOrder(StateVisitor, int)}, starting from root automaton node.
   *
   * @param v Visitor to receive traversal calls.
   * @param <T> A subclass of {@link StateVisitor}.
   * @return Returns the argument (for access to anonymous class fields).
   */
  public <T extends StateVisitor> T visitInPreOrder(T v) {
    return visitInPreOrder(v, getRootNode());
  }

  /**
   * Visits all states in preorder. Returning false from {@link StateVisitor#accept(int)} skips
   * traversal of all sub-states of a given state.
   *
   * @param v Visitor to receive traversal calls.
   * @param <T> A subclass of {@link StateVisitor}.
   * @param node Identifier of the node.
   * @return Returns the argument (for access to anonymous class fields).
   */
  public <T extends StateVisitor> T visitInPreOrder(T v, int node) {
    visitInPreOrder(v, node, new BitSet());
    return v;
  }

  /**
   * @param in The input stream.
   * @return Reads all remaining bytes from an input stream and returns them as a byte array.
   * @throws IOException Rethrown if an I/O exception occurs.
   */
  protected static final byte[] readRemaining(InputStream in) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    byte[] buffer = new byte[1024 * 8];
    int len;
    while ((len = in.read(buffer)) >= 0) {
      baos.write(buffer, 0, len);
    }
    return baos.toByteArray();
  }

  /** Private recursion. */
  private void visitInPreOrder(StateVisitor v, int node, BitSet visited) {
    if (visited.get(node)) {
      return;
    }
    visited.set(node);

    if (v.accept(node)) {
      for (int arc = getFirstArc(node); arc != 0; arc = getNextArc(arc)) {
        if (!isArcTerminal(arc)) {
          visitInPreOrder(v, getEndNode(arc), visited);
        }
      }
    }
  }

  /**
   * A factory for reading automata in any of the supported versions.
   *
   * @param stream The input stream to read automaton data from. The stream is not closed.
   * @return Returns an instantiated automaton. Never null.
   * @throws IOException If the input stream does not represent an automaton or is otherwise
   *     invalid.
   */
  public static FSA read(InputStream stream) throws IOException {
    final FSAHeader header = FSAHeader.read(stream);

    switch (header.version) {
      case FSA5.VERSION:
        return new FSA5(stream);
      case CFSA.VERSION:
        return new CFSA(stream);
      case CFSA2.VERSION:
        return new CFSA2(stream);
      default:
        throw new IOException(
            String.format(
                Locale.ROOT, "Unsupported automaton version: 0x%02x", header.version & 0xFF));
    }
  }

  /**
   * A factory for reading a specific FSA subclass, including proper casting.
   *
   * @param stream The input stream to read automaton data from. The stream is not closed.
   * @param clazz A subclass of {@link FSA} to cast the read automaton to.
   * @param <T> A subclass of {@link FSA} to cast the read automaton to.
   * @return Returns an instantiated automaton. Never null.
   * @throws IOException If the input stream does not represent an automaton, is otherwise invalid
   *     or the class of the automaton read from the input stream is not assignable to <code>clazz
   *     </code>.
   */
  public static <T extends FSA> T read(InputStream stream, Class<? extends T> clazz)
      throws IOException {
    FSA fsa = read(stream);
    if (!clazz.isInstance(fsa)) {
      throw new IOException(
          String.format(
              Locale.ROOT,
              "Expected FSA type %s, but read an incompatible type %s.",
              clazz.getName(),
              fsa.getClass().getName()));
    }
    return clazz.cast(fsa);
  }
}


================================================
FILE: morfologik-fsa/src/main/java/morfologik/fsa/FSA5.java
================================================
package morfologik.fsa;

import static morfologik.fsa.FSAFlags.*;

import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Set;

/**
 * FSA binary format implementation for version 5.
 *
 * <p>Version 5 indicates the dictionary was built with these flags: {@link FSAFlags#FLEXIBLE},
 * {@link FSAFlags#STOPBIT} and {@link FSAFlags#NEXTBIT}. The internal representation of the FSA
 * must therefore follow this description (please note this format describes only a single
 * transition (arc), not the entire dictionary file).
 *
 * <pre>
 * ---- this node header present only if automaton was compiled with NUMBERS option.
 * Byte
 *        +-+-+-+-+-+-+-+-+\
 *      0 | | | | | | | | | \  LSB
 *        +-+-+-+-+-+-+-+-+  +
 *      1 | | | | | | | | |  |      number of strings recognized
 *        +-+-+-+-+-+-+-+-+  +----- by the automaton starting
 *        : : : : : : : : :  |      from this node.
 *        +-+-+-+-+-+-+-+-+  +
 *  ctl-1 | | | | | | | | | /  MSB
 *        +-+-+-+-+-+-+-+-+/
 *
 * ---- remaining part of the node
 *
 * Byte
 *       +-+-+-+-+-+-+-+-+\
 *     0 | | | | | | | | | +------ label
 *       +-+-+-+-+-+-+-+-+/
 *
 *                  +------------- node pointed to is next
 *                  | +----------- the last arc of the node
 *                  | | +--------- the arc is final
 *                  | | |
 *             +-----------+
 *             |    | | |  |
 *         ___+___  | | |  |
 *        /       \ | | |  |
 *       MSB           LSB |
 *        7 6 5 4 3 2 1 0  |
 *       +-+-+-+-+-+-+-+-+ |
 *     1 | | | | | | | | | \ \
 *       +-+-+-+-+-+-+-+-+  \ \  LSB
 *       +-+-+-+-+-+-+-+-+     +
 *     2 | | | | | | | | |     |
 *       +-+-+-+-+-+-+-+-+     |
 *     3 | | | | | | | | |     +----- target node address (in bytes)
 *       +-+-+-+-+-+-+-+-+     |      (not present except for the byte
 *       : : : : : : : : :     |       with flags if the node pointed to
 *       +-+-+-+-+-+-+-+-+     +       is next)
 *   gtl | | | | | | | | |    /  MSB
 *       +-+-+-+-+-+-+-+-+   /
 * gtl+1                           (gtl = gotoLength)
 * </pre>
 */
public final class FSA5 extends FSA {
  /** Default filler byte. */
  public static final byte DEFAULT_FILLER = '_';

  /** Default annotation byte. */
  public static final byte DEFAULT_ANNOTATION = '+';

  /** Automaton version as in the file header. */
  public static final byte VERSION = 5;

  /**
   * Bit indicating that an arc corresponds to the last character of a sequence available when
   * building the automaton.
   */
  public static final int BIT_FINAL_ARC = 1 << 0;

  /**
   * Bit indicating that an arc is the last one of the node's list and the following one belongs to
   * another node.
   */
  public static final int BIT_LAST_ARC = 1 << 1;

  /**
   * Bit indicating that the target node of this arc follows it in the compressed automaton
   * structure (no goto field).
   */
  public static final int BIT_TARGET_NEXT = 1 << 2;

  /**
   * An offset in the arc structure, where the address and flags field begins. In version 5 of FSA
   * automata, this value is constant (1, skip label).
   */
  public static final int ADDRESS_OFFSET = 1;

  /**
   * An array of bytes with the internal representation of the automaton. Please see the
   * documentation of this class for more information on how this structure is organized.
   */
  public final byte[] arcs;

  /**
   * The length of the node header structure (if the automaton was compiled with <code>NUMBERS
   * </code> option). Otherwise zero.
   */
  public final int nodeDataLength;

  /** Flags for this automaton version. */
  private Set<FSAFlags> flags;

  /** Number of bytes each address takes in full, expanded form (goto length). */
  public final int gtl;

  /** Filler character. */
  public final byte filler;

  /** Annotation character. */
  public final byte annotation;

  /** Read and wrap a binary automaton in FSA version 5. */
  FSA5(InputStream stream) throws IOException {
    DataInputStream in = new DataInputStream(stream);

    this.filler = in.readByte();
    this.annotation = in.readByte();
    final byte hgtl = in.readByte();

    /*
     * Determine if the automaton was compiled with NUMBERS. If so, modify
     * ctl and goto fields accordingly.
     */
    flags = EnumSet.of(FLEXIBLE, STOPBIT, NEXTBIT);
    if ((hgtl & 0xf0) != 0) {
      flags.add(NUMBERS);
    }

    flags = Collections.unmodifiableSet(flags);

    this.nodeDataLength = (hgtl >>> 4) & 0x0f;
    this.gtl = hgtl & 0x0f;

    arcs = readRemaining(in);
  }

  /** Returns the start node of this automaton. */
  @Override
  public int getRootNode() {
    // Skip dummy node marking terminating state.
    final int epsilonNode = skipArc(getFirstArc(0));

    // And follow the epsilon node's first (and only) arc.
    return getDestinationNodeOffset(getFirstArc(epsilonNode));
  }

  /** {@inheritDoc} */
  @Override
  public final int getFirstArc(int node) {
    return nodeDataLength + node;
  }

  /** {@inheritDoc} */
  @Override
  public final int getNextArc(int arc) {
    if (isArcLast(arc)) return 0;
    else return skipArc(arc);
  }

  /** {@inheritDoc} */
  @Override
  public int getArc(int node, byte label) {
    for (int arc = getFirstArc(node); arc != 0; arc = getNextArc(arc)) {
      if (getArcLabel(arc) == label) return arc;
    }

    // An arc labeled with "label" not found.
    return 0;
  }

  /** {@inheritDoc} */
  @Override
  public int getEndNode(int arc) {
    final int nodeOffset = getDestinationNodeOffset(arc);
    assert nodeOffset != 0 : "No target node for terminal arcs.";
    return nodeOffset;
  }

  /** {@inheritDoc} */
  @Override
  public byte getArcLabel(int arc) {
    return arcs[arc];
  }

  /** {@inheritDoc} */
  @Override
  public boolean isArcFinal(int arc) {
    return (arcs[arc + ADDRESS_OFFSET] & BIT_FINAL_ARC) != 0;
  }

  /** {@inheritDoc} */
  @Override
  public boolean isArcTerminal(int arc) {
    return (0 == getDestinationNodeOffset(arc));
  }

  /**
   * Returns the number encoded at the given node. The number equals the count of the set of
   * suffixes reachable from <code>node</code> (called its right language).
   */
  @Override
  public int getRightLanguageCount(int node) {
    assert getFlags().contains(FSAFlags.NUMBERS) : "This FSA was not compiled with NUMBERS.";
    return decodeFromBytes(arcs, node, nodeDataLength);
  }

  /**
   * {@inheritDoc}
   *
   * <p>For this automaton version, an additional {@link FSAFlags#NUMBERS} flag may be set to
   * indicate the automaton contains extra fields for each node.
   */
  @Override
  public Set<FSAFlags> getFlags() {
    return flags;
  }

  /**
   * Returns <code>true</code> if this arc has <code>NEXT</code> bit set.
   *
   * @see #BIT_LAST_ARC
   * @param arc The node's arc identifier.
   * @return Returns true if the argument is the last arc of a node.
   */
  public boolean isArcLast(int arc) {
    return (arcs[arc + ADDRESS_OFFSET] & BIT_LAST_ARC) != 0;
  }

  /**
   * @see #BIT_TARGET_NEXT
   * @param arc The node's arc identifier.
   * @return Returns true if {@link #BIT_TARGET_NEXT} is set for this arc.
   */
  public boolean isNextSet(int arc) {
    return (arcs[arc + ADDRESS_OFFSET] & BIT_TARGET_NEXT) != 0;
  }

  /** Returns an n-byte integer encoded in byte-packed representation. */
  static final int decodeFromBytes(final byte[] arcs, final int start, final int n) {
    int r = 0;
    for (int i = n; --i >= 0; ) {
      r = r << 8 | (arcs[start + i] & 0xff);
    }
    return r;
  }

  /** Returns the address of the node pointed to by this arc. */
  final int getDestinationNodeOffset(int arc) {
    if (isNextSet(arc)) {
      /* The destination node follows this arc in the array. */
      return skipArc(arc);
    } else {
      /*
       * The destination node address has to be extracted from the arc's
       * goto field.
       */
      return decodeFromBytes(arcs, arc + ADDRESS_OFFSET, gtl) >>> 3;
    }
  }

  /** Read the arc's layout and skip as many bytes, as needed. */
  private int skipArc(int offset) {
    return offset
        + (isNextSet(offset) ? 1 + 1 /* label + flags */ : 1 + gtl /* label + flags/address */);
  }
}


================================================
FILE: morfologik-fsa/src/main/java/morfologik/fsa/FSAFlags.java
================================================
package morfologik.fsa;

import java.util.Set;

/** FSA automaton flags. Where applicable, flags follow Daciuk's <code>fsa</code> package. */
public enum FSAFlags {
  /** Daciuk: flexible FSA encoding. */
  FLEXIBLE(1 << 0),

  /** Daciuk: stop bit in use. */
  STOPBIT(1 << 1),

  /** Daciuk: next bit in use. */
  NEXTBIT(1 << 2),

  /** Daciuk: tails compression. */
  TAILS(1 << 3),

  /*
   * These flags are outside of byte range (never occur in Daciuk's FSA).
   */

  /**
   * The FSA contains right-language count numbers on states.
   *
   * @see FSA#getRightLanguageCount(int)
   */
  NUMBERS(1 << 8),

  /**
   * The FSA supports legacy built-in separator and filler characters (Daciuk's FSA package
   * compatibility).
   */
  SEPARATORS(1 << 9);

  /** Bit mask for the corresponding flag. */
  public final int bits;

  /** */
  private FSAFlags(int bits) {
    this.bits = bits;
  }

  /**
   * @param flags The bitset with flags.
   * @return Returns <code>true</code> iff this flag is set in <code>flags</code>.
   */
  public boolean isSet(int flags) {
    return (flags & bits) != 0;
  }

  /**
   * @param flags A set of flags to encode.
   * @return Returns the set of flags encoded as packed <code>short</code>.
   */
  public static short asShort(Set<FSAFlags> flags) {
    short value = 0;
    for (FSAFlags f : flags) {
      value |= f.bits;
    }
    return value;
  }
}


================================================
FILE: morfologik-fsa/src/main/java/morfologik/fsa/FSAHeader.java
================================================
package morfologik.fsa;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

/** Standard FSA file header, as described in <code>fsa</code> package documentation. */
public final class FSAHeader {
  /** FSA magic (4 bytes). */
  static final int FSA_MAGIC = ('\\' << 24) | ('f' << 16) | ('s' << 8) | ('a');

  /** Maximum length of the header block. */
  static final int MAX_HEADER_LENGTH = 4 + 8;

  /** FSA version number. */
  final byte version;

  FSAHeader(byte version) {
    this.version = version;
  }

  /**
   * Read FSA header and version from a stream, consuming read bytes.
   *
   * @param in The input stream to read data from.
   * @return Returns a valid {@link FSAHeader} with version information.
   * @throws IOException If the stream ends prematurely or if it contains invalid data.
   */
  public static FSAHeader read(InputStream in) throws IOException {
    if (in.read() != ((FSA_MAGIC >>> 24))
        || in.read() != ((FSA_MAGIC >>> 16) & 0xff)
        || in.read() != ((FSA_MAGIC >>> 8) & 0xff)
        || in.read() != ((FSA_MAGIC) & 0xff)) {
      throw new IOException("Invalid file header, probably not an FSA.");
    }

    int version = in.read();
    if (version == -1) {
      throw new IOException("Truncated file, no version number.");
    }

    return new FSAHeader((byte) version);
  }

  /**
   * Writes FSA magic bytes and version information.
   *
   * @param os The stream to write to.
   * @param version Automaton version.
   * @throws IOException Rethrown if writing fails.
   */
  public static void write(OutputStream os, byte version) throws IOException {
    os.write(FSA_MAGIC >> 24);
    os.write(FSA_MAGIC >> 16);
    os.write(FSA_MAGIC >> 8);
    os.write(FSA_MAGIC);
    os.write(version);
  }
}


================================================
FILE: morfologik-fsa/src/main/java/morfologik/fsa/FSATraversal.java
================================================
package morfologik.fsa;

import static morfologik.fsa.MatchResult.*;

/** This class implements some common matching and scanning operations on a generic FSA. */
public final class FSATraversal {
  /** Target automaton. */
  private final FSA fsa;

  /**
   * Traversals of the given FSA.
   *
   * @param fsa The target automaton for traversals.
   */
  public FSATraversal(FSA fsa) {
    this.fsa = fsa;
  }

  /**
   * Calculate perfect hash for a given input sequence of bytes. The perfect hash requires that
   * {@link FSA} is built with {@link FSAFlags#NUMBERS} and corresponds to the sequential order of
   * input sequences used at automaton construction time.
   *
   * @param sequence The byte sequence to calculate perfect hash for.
   * @param start Start index in the sequence array.
   * @param length Length of the byte sequence, must be at least 1.
   * @param node The node to start traversal from, typically the {@linkplain FSA#getRootNode() root
   *     node}.
   * @return Returns a unique integer assigned to the input sequence in the automaton (reflecting
   *     the number of that sequence in the input used to build the automaton). Returns a negative
   *     integer if the input sequence was not part of the input from which the automaton was
   *     created. The type of mismatch is a constant defined in {@link MatchResult}.
   */
  public int perfectHash(byte[] sequence, int start, int length, int node) {
    assert fsa.getFlags().contains(FSAFlags.NUMBERS) : "FSA not built with NUMBERS option.";
    assert length > 0 : "Must be a non-empty sequence.";

    int hash = 0;
    final int end = start + length - 1;

    int seqIndex = start;
    byte label = sequence[seqIndex];

    // Seek through the current node's labels, looking for 'label', update hash.
    for (int arc = fsa.getFirstArc(node); arc != 0; ) {
      if (fsa.getArcLabel(arc) == label) {
        if (fsa.isArcFinal(arc)) {
          if (seqIndex == end) {
            return hash;
          }

          hash++;
        }

        if (fsa.isArcTerminal(arc)) {
          /* The automaton contains a prefix of the input sequence. */
          return AUTOMATON_HAS_PREFIX;
        }

        // The sequence is a prefix of one of the sequences stored in the automaton.
        if (seqIndex == end) {
          return SEQUENCE_IS_A_PREFIX;
        }

        // Make a transition along the arc, go the target node's first arc.
        arc = fsa.getFirstArc(fsa.getEndNode(arc));
        label = sequence[++seqIndex];
        continue;
      } else {
        if (fsa.isArcFinal(arc)) {
          hash++;
        }
        if (!fsa.isArcTerminal(arc)) {
          hash += fsa.getRightLanguageCount(fsa.getEndNode(arc));
        }
      }

      arc = fsa.getNextArc(arc);
    }

    if (seqIndex > start) {
      return AUTOMATON_HAS_PREFIX;
    } else {
      // Labels of this node ended without a match on the sequence.
      // Perfect hash does not exist.
      return NO_MATCH;
    }
  }

  /**
   * @param sequence The byte sequence to calculate perfect hash for.
   * @see #perfectHash(byte[], int, int, int)
   * @return Returns a unique integer assigned to the input sequence in the automaton (reflecting
   *     the number of that sequence in the input used to build the automaton). Returns a negative
   *     integer if the input sequence was not part of the input from which the automaton was
   *     created. The type of mismatch is a constant defined in {@link MatchResult}.
   */
  public int perfectHash(byte[] sequence) {
    return perfectHash(sequence, 0, sequence.length, fsa.getRootNode());
  }

  /**
   * Same as {@link #match(byte[], int, int, int)}, but allows passing a reusable {@link
   * MatchResult} object so that no intermediate garbage is produced.
   *
   * @param reuse The {@link MatchResult} to reuse.
   * @param sequence Input sequence to look for in the automaton.
   * @param start Start index in the sequence array.
   * @param length Length of the byte sequence, must be at least 1.
   * @param node The node to start traversal from, typically the {@linkplain FSA#getRootNode() root
   *     node}.
   * @return The same object as <code>reuse</code>, but with updated match {@link MatchResult#kind}
   *     and other relevant fields.
   */
  public MatchResult match(MatchResult reuse, byte[] sequence, int start, int length, int node) {
    if (node == 0) {
      reuse.reset(NO_MATCH, start, node);
      return reuse;
    }

    final FSA fsa = this.fsa;
    final int end = start + length;
    for (int i = start; i < end; i++) {
      final int arc = fsa.getArc(node, sequence[i]);
      if (arc != 0) {
        if (i + 1 == end && fsa.isArcFinal(arc)) {
          /* The automaton has an exact match of the input sequence. */
          reuse.reset(EXACT_MATCH, i, node);
          return reuse;
        }

        if (fsa.isArcTerminal(arc)) {
          /* The automaton contains a prefix of the input sequence. */
          reuse.reset(AUTOMATON_HAS_PREFIX, i + 1, node);
          return reuse;
        }

        // Make a transition along the arc.
        node = fsa.getEndNode(arc);
      } else {
        if (i > start) {
          reuse.reset(AUTOMATON_HAS_PREFIX, i, node);
        } else {
          reuse.reset(NO_MATCH, i, node);
        }
        return reuse;
      }
    }

    /* The sequence is a prefix of at least one sequence in the automaton. */
    reuse.reset(SEQUENCE_IS_A_PREFIX, 0, node);
    return reuse;
  }

  /**
   * Finds a matching path in the dictionary for a given sequence of labels from <code>sequence
   * </code> and starting at node <code>node</code>.
   *
   * @param sequence Input sequence to look for in the automaton.
   * @param start Start index in the sequence array.
   * @param length Length of the byte sequence, must be at least 1.
   * @param node The node to start traversal from, typically the {@linkplain FSA#getRootNode() root
   *     node}.
   * @see #match(byte [], int)
   * @return {@link MatchResult} with updated match {@link MatchResult#kind}.
   */
  public MatchResult match(byte[] sequence, int start, int length, int node) {
    return match(new MatchResult(), sequence, start, length, node);
  }

  /**
   * @param sequence Input sequence to look for in the automaton.
   * @param node The node to start traversal from, typically the {@linkplain FSA#getRootNode() root
   *     node}.
   * @see #match(byte [], int)
   * @return {@link MatchResult} with updated match {@link MatchResult#kind}.
   */
  public MatchResult match(byte[] sequence, int node) {
    return match(sequence, 0, sequence.length, node);
  }

  /**
   * @param sequence Input sequence to look for in the automaton.
   * @see #match(byte [], int)
   * @return {@link MatchResult} with updated match {@link MatchResult#kind}.
   */
  public MatchResult match(byte[] sequence) {
    return match(sequence, fsa.getRootNode());
  }
}


================================================
FILE: morfologik-fsa/src/main/java/morfologik/fsa/MatchResult.java
================================================
package morfologik.fsa;

/**
 * A matching result returned from {@link FSATraversal}.
 *
 * @see FSATraversal
 */
public final class MatchResult {
  /** The automaton has exactly one match for the input sequence. */
  public static final int EXACT_MATCH = 0;

  /**
   * The automaton has no match for the input sequence and no sequence in the automaton is a prefix
   * of the input.
   *
   * <p>Note that to check for a general "input does not exist in the automaton" you have to check
   * for both {@link #NO_MATCH} and {@link #AUTOMATON_HAS_PREFIX}.
   */
  public static final int NO_MATCH = -1;

  /**
   * The automaton contains a prefix of the input sequence (but the full sequence does not exist).
   * This translates to: one of the input sequences used to build the automaton is a prefix of the
   * input sequence, but the input sequence contains a non-existent suffix.
   *
   * <p>{@link MatchResult#index} will contain an index of the first character of the input sequence
   * not present in the dictionary.
   */
  public static final int AUTOMATON_HAS_PREFIX = -3;

  /**
   * The sequence is a prefix of at least one sequence in the automaton. {@link MatchResult#node}
   * returns the node from which all sequences with the given prefix start in the automaton.
   */
  public static final int SEQUENCE_IS_A_PREFIX = -4;

  /**
   * One of the match types defined in this class.
   *
   * @see #NO_MATCH
   * @see #EXACT_MATCH
   * @see #AUTOMATON_HAS_PREFIX
   * @see #SEQUENCE_IS_A_PREFIX
   */
  public int kind;

  /** Input sequence's index, interpretation depends on {@link #kind}. */
  public int index;

  /** Automaton node, interpretation depends on the {@link #kind}. */
  public int node;

  MatchResult(int kind, int index, int node) {
    reset(kind, index, node);
  }

  MatchResult(int kind) {
    reset(kind, 0, 0);
  }

  public MatchResult() {
    reset(NO_MATCH, 0, 0);
  }

  final void reset(int kind, int index, int node) {
    this.kind = kind;
    this.index = index;
    this.node = node;
  }
}


================================================
FILE: morfologik-fsa/src/main/java/morfologik/fsa/StateVisitor.java
================================================
package morfologik.fsa;

/**
 * State visitor.
 *
 * @see FSA#visitInPostOrder(StateVisitor)
 * @see FSA#visitInPreOrder(StateVisitor)
 */
public interface StateVisitor {
  public boolean accept(int state);
}


================================================
FILE: morfologik-fsa-builders/pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

  <modelVersion>4.0.0</modelVersion>

  <parent>
    <groupId>org.carrot2</groupId>
    <artifactId>morfologik-parent</artifactId>
    <version>2.2.0-SNAPSHOT</version>
    <relativePath>../pom.xml</relativePath>
  </parent>

  <artifactId>morfologik-fsa-builders</artifactId>
  <packaging>bundle</packaging>

  <name>Morfologik FSA (Builder)</name>
  <description>Morfologik Finite State Automata Builder</description>

  <properties>
    <forbiddenapis.signaturefile>../etc/forbidden-apis/signatures.txt</forbiddenapis.signaturefile>
    <project.moduleId>org.carrot2.morfologik.fsa_builders</project.moduleId>
  </properties>

  <dependencies>
    <dependency>
      <groupId>org.carrot2</groupId>
      <artifactId>morfologik-fsa</artifactId>
      <version>${project.version}</version>
    </dependency>
    
    <dependency>
      <groupId>com.carrotsearch</groupId>
      <artifactId>hppc</artifactId>
    </dependency>    
  </dependencies>

  <build>
    <plugins>
      <plugin>
        <groupId>org.apache.felix</groupId>
        <artifactId>maven-bundle-plugin</artifactId>
        <configuration>
          <instructions>
            <Export-Package>morfologik.fsa.builders</Export-Package>
            <Import-Package>*</Import-Package>
          </instructions>
        </configuration>
      </plugin>
    </plugins>
  </build>
</project>


================================================
FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/CFSA2Serializer.java
================================================
package morfologik.fsa.builders;

import static morfologik.fsa.CFSA2.*;
import static morfologik.fsa.FSAFlags.*;

import com.carrotsearch.hppc.BoundedProportionalArraySizingStrategy;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntStack;
import com.carrotsearch.hppc.cursors.IntCursor;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import java.io.IOException;
import java.io.OutputStream;
import java.util.BitSet;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.Locale;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import morfologik.fsa.CFSA2;
import morfologik.fsa.FSA;
import morfologik.fsa.FSAFlags;
import morfologik.fsa.FSAHeader;
import morfologik.fsa.StateVisitor;
import morfologik.fsa.builders.FSAUtils.IntIntHolder;

/**
 * Serializes in-memory {@link FSA} graphs to {@link CFSA2}.
 *
 * <p>It is possible to serialize the automaton with numbers required for perfect hashing. See
 * {@link #withNumbers()} method.
 *
 * @see CFSA2
 */
public final class CFSA2Serializer implements FSASerializer {
  private final Logger logger = Logger.getLogger(getClass().getName());

  /** Supported flags. */
  private static final EnumSet<FSAFlags> flags = EnumSet.of(NUMBERS, FLEXIBLE, STOPBIT, NEXTBIT);

  /** No-state id. */
  private static final int NO_STATE = -1;

  /**
   * <code>true</code> if we should serialize with numbers.
   *
   * @see #withNumbers()
   */
  private boolean withNumbers;

  /** A hash map of [state, offset] pairs. */
  private IntIntHashMap offsets = new IntIntHashMap();

  /** A hash map of [state, right-language-count] pairs. */
  private IntIntHashMap numbers = new IntIntHashMap();

  /** Scratch array for serializing vints. */
  private final byte[] scratch = new byte[5];

  /** The most frequent labels for integrating with the flags field. */
  private byte[] labelsIndex;

  /**
   * Inverted index of labels to be integrated with flags field. A label at
   * index <code>i<code> has the index or zero (no integration).
   */
  private int[] labelsInvIndex;

  /**
   * Serialize the automaton with the number of right-language sequences in each node. This is
   * required to implement perfect hashing. The numbering also preserves the order of input
   * sequences.
   *
   * @return Returns the same object for easier call chaining.
   */
  public CFSA2Serializer withNumbers() {
    withNumbers = true;
    return this;
  }

  /**
   * Serializes any {@link FSA} to {@link CFSA2} stream.
   *
   * @see #withNumbers()
   * @return Returns <code>os</code> for chaining.
   */
  @Override
  public <T extends OutputStream> T serialize(final FSA fsa, T os) throws IOException {
    /*
     * Calculate the most frequent labels and build indexed labels dictionary.
     */
    computeLabelsIndex(fsa);

    /*
     * Calculate the number of bytes required for the node data, if
     * serializing with numbers.
     */
    if (withNumbers) {
      this.numbers = FSAUtils.rightLanguageForAllStates(fsa);
    }

    /*
     * Linearize all the states, optimizing their layout.
     */
    IntArrayList linearized = linearize(fsa);

    /*
     * Emit the header.
     */
    FSAHeader.write(os, CFSA2.VERSION);

    EnumSet<FSAFlags> fsaFlags = EnumSet.of(FLEXIBLE, STOPBIT, NEXTBIT);
    if (withNumbers) {
      fsaFlags.add(NUMBERS);
    }

    final short sflags = FSAFlags.asShort(fsaFlags);
    os.write((sflags >> 8) & 0xFF);
    os.write((sflags) & 0xFF);

    /*
     * Emit labels index.
     */
    os.write(labelsIndex.length);
    os.write(labelsIndex);

    /*
     * Emit the automaton.
     */
    int size = emitNodes(fsa, os, linearized);
    assert size == 0 : "Size changed in the final pass?";

    return os;
  }

  /** Compute a set of labels to be integrated with the flags field. */
  private void computeLabelsIndex(final FSA fsa) {
    // Compute labels count.
    final int[] countByValue = new int[256];

    fsa.visitAllStates(
        new StateVisitor() {
          public boolean accept(int state) {
            for (int arc = fsa.getFirstArc(state); arc != 0; arc = fsa.getNextArc(arc))
              countByValue[fsa.getArcLabel(arc) & 0xff]++;
            return true;
          }
        });

    // Order by descending frequency of counts and increasing label value.
    Comparator<IntIntHolder> comparator =
        new Comparator<IntIntHolder>() {
          public int compare(IntIntHolder o1, IntIntHolder o2) {
            int countDiff = o2.b - o1.b;
            if (countDiff == 0) {
              countDiff = o1.a - o2.a;
            }
            return countDiff;
          }
        };

    TreeSet<IntIntHolder> labelAndCount = new TreeSet<IntIntHolder>(comparator);
    for (int label = 0; label < countByValue.length; label++) {
      if (countByValue[label] > 0) {
        labelAndCount.add(new IntIntHolder(label, countByValue[label]));
      }
    }

    labelsIndex = new byte[1 + Math.min(labelAndCount.size(), CFSA2.LABEL_INDEX_SIZE)];
    labelsInvIndex = new int[256];
    for (int i = labelsIndex.length - 1; i > 0 && !labelAndCount.isEmpty(); i--) {
      IntIntHolder p = labelAndCount.first();
      labelAndCount.remove(p);
      labelsInvIndex[p.a] = i;
      labelsIndex[i] = (byte) p.a;
    }
  }

  /** Return supported flags. */
  @Override
  public Set<FSAFlags> getFlags() {
    return flags;
  }

  /** Linearization of states. */
  private IntArrayList linearize(final FSA fsa) throws IOException {
    /*
     * Compute the states with most inlinks. These should be placed as close to the
     * start of the automaton, as possible so that v-coded addresses are tiny.
     */
    final IntIntHashMap inlinkCount = computeInlinkCount(fsa);

    /*
     * An array of ordered states for serialization.
     */
    final IntArrayList linearized =
        new IntArrayList(0, new BoundedProportionalArraySizingStrategy(1000, 10000, 1.5f));

    /*
     * Determine which states should be linearized first (at fixed positions) so as to
     * minimize the place occupied by goto fields.
     */
    int maxStates = Integer.MAX_VALUE;
    int minInlinkCount = 2;
    int[] states = computeFirstStates(inlinkCount, maxStates, minInlinkCount);

    /*
     * Compute initial addresses, without node rearrangements.
     */
    int serializedSize = linearizeAndCalculateOffsets(fsa, new IntArrayList(), linearized, offsets);

    /*
     * Probe for better node arrangements by selecting between [lower, upper]
     * nodes from the potential candidate nodes list.
     */
    IntArrayList sublist = new IntArrayList();
    sublist.buffer = states;
    sublist.elementsCount = states.length;

    /*
     * Probe the initial region a little bit, looking for optimal cut. It can't be binary search
     * because the result isn't monotonic.
     */
    log(Level.FINE, "Compacting, initial output size: %,d", serializedSize);
    int cutAt = 0;
    for (int cut = Math.min(25, states.length); cut <= Math.min(150, states.length); cut += 25) {
      sublist.elementsCount = cut;
      int newSize = linearizeAndCalculateOffsets(fsa, sublist, linearized, offsets);
      log(Level.FINE, "Moved %,d states, output size: %,d", sublist.size(), newSize);
      if (newSize >= serializedSize) {
        break;
      }
      cutAt = cut;
    }

    /*
     * Cut at the calculated point and repeat linearization.
     */
    sublist.elementsCount = cutAt;
    int size = linearizeAndCalculateOffsets(fsa, sublist, linearized, offsets);
    log(Level.FINE, "%,d states moved, final size: %,d", sublist.size(), size);
    return linearized;
  }

  private void log(Level level, String msg, Object... args) {
    logger.log(level, String.format(Locale.ROOT, msg, args));
  }

  /**
   * Linearize all states, putting <code>states</code> in front of the automaton and calculating
   * stable state offsets.
   */
  private int linearizeAndCalculateOffsets(
      FSA fsa, IntArrayList states, IntArrayList linearized, IntIntHashMap offsets)
      throws IOException {
    final BitSet visited = new BitSet();
    final IntStack nodes = new IntStack();
    linearized.clear();

    /*
     * Linearize states with most inlinks first.
     */
    for (int i = 0; i < states.size(); i++) {
      linearizeState(fsa, nodes, linearized, visited, states.get(i));
    }

    /*
     * Linearize the remaining states by chaining them one after another, in depth-order.
     */
    nodes.push(fsa.getRootNode());
    while (!nodes.isEmpty()) {
      final int node = nodes.pop();
      if (visited.get(node)) continue;

      linearizeState(fsa, nodes, linearized, visited, node);
    }

    /*
     * Calculate new state offsets. This is iterative. We start with
     * maximum potential offsets and recalculate until converged.
     */
    int MAX_OFFSET = Integer.MAX_VALUE;
    for (IntCursor c : linearized) {
      offsets.put(c.value, MAX_OFFSET);
    }

    int i, j = 0;
    while ((i = emitNodes(fsa, null, linearized)) > 0) {
      j = i;
    }
    return j;
  }

  /** Add a state to linearized list. */
  private void linearizeState(
      final FSA fsa, IntStack nodes, IntArrayList linearized, BitSet visited, int node) {
    linearized.add(node);
    visited.set(node);
    for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
      if (!fsa.isArcTerminal(arc)) {
        final int target = fsa.getEndNode(arc);
        if (!visited.get(target)) nodes.push(target);
      }
    }
  }

  /**
   * Compute the set of states that should be linearized first to minimize other states goto length.
   */
  private int[] computeFirstStates(IntIntHashMap inlinkCount, int maxStates, int minInlinkCount) {
    Comparator<IntIntHolder> comparator =
        new Comparator<FSAUtils.IntIntHolder>() {
          public int compare(IntIntHolder o1, IntIntHolder o2) {
            int v = o1.a - o2.a;
            return v == 0 ? (o1.b - o2.b) : v;
          }
        };

    PriorityQueue<IntIntHolder> stateInlink = new PriorityQueue<IntIntHolder>(1, comparator);
    IntIntHolder scratch = new IntIntHolder();
    for (IntIntCursor c : inlinkCount) {
      if (c.value > minInlinkCount) {
        scratch.a = c.value;
        scratch.b = c.key;

        if (stateInlink.size() < maxStates || comparator.compare(scratch, stateInlink.peek()) > 0) {
          stateInlink.add(new IntIntHolder(c.value, c.key));
          if (stateInlink.size() > maxStates) {
            stateInlink.remove();
          }
        }
      }
    }

    int[] states = new int[stateInlink.size()];
    for (int position = states.length; !stateInlink.isEmpty(); ) {
      IntIntHolder i = stateInlink.remove();
      states[--position] = i.b;
    }

    return states;
  }

  /** Compute in-link count for each state. */
  private IntIntHashMap computeInlinkCount(final FSA fsa) {
    IntIntHashMap inlinkCount = new IntIntHashMap();
    BitSet visited = new BitSet();
    IntStack nodes = new IntStack();
    nodes.push(fsa.getRootNode());

    while (!nodes.isEmpty()) {
      final int node = nodes.pop();
      if (visited.get(node)) continue;

      visited.set(node);

      for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
        if (!fsa.isArcTerminal(arc)) {
          final int target = fsa.getEndNode(arc);
          inlinkCount.putOrAdd(target, 1, 1);
          if (!visited.get(target)) nodes.push(target);
        }
      }
    }

    return inlinkCount;
  }

  /** Update arc offsets assuming the given goto length. */
  private int emitNodes(FSA fsa, OutputStream os, IntArrayList linearized) throws IOException {
    int offset = 0;

    // Add epsilon state.
    offset += emitNodeData(os, 0);
    if (fsa.getRootNode() != 0)
      offset += emitArc(os, BIT_LAST_ARC, (byte) '^', offsets.get(fsa.getRootNode()));
    else offset += emitArc(os, BIT_LAST_ARC, (byte) '^', 0);

    boolean offsetsChanged = false;
    final int max = linearized.size();
    for (IntCursor c : linearized) {
      final int state = c.value;
      final int nextState = c.index + 1 < max ? linearized.get(c.index + 1) : NO_STATE;

      if (os == null) {
        offsetsChanged |= (offsets.get(state) != offset);
        offsets.put(state, offset);
      } else {
        assert offsets.get(state) == offset : state + " " + offsets.get(state) + " " + offset;
      }

      offset += emitNodeData(os, withNumbers ? numbers.get(state) : 0);
      offset += emitNodeArcs(fsa, os, state, nextState);
    }

    return offsetsChanged ? offset : 0;
  }

  /** Emit all arcs of a single node. */
  private int emitNodeArcs(FSA fsa, OutputStream os, final int state, final int nextState)
      throws IOException {
    int offset = 0;
    for (int arc = fsa.getFirstArc(state); arc != 0; arc = fsa.getNextArc(arc)) {
      int targetOffset;
      final int target;

      if (fsa.isArcTerminal(arc)) {
        target = 0;
        targetOffset = 0;
      } else {
        target = fsa.getEndNode(arc);
        targetOffset = offsets.get(target);
      }

      int flags = 0;

      if (fsa.isArcFinal(arc)) {
        flags |= BIT_FINAL_ARC;
      }

      if (fsa.getNextArc(arc) == 0) {
        flags |= BIT_LAST_ARC;
      }

      if (targetOffset != 0 && target == nextState) {
        flags |= BIT_TARGET_NEXT;
        targetOffset = 0;
      }

      offset += emitArc(os, flags, fsa.getArcLabel(arc), targetOffset);
    }

    return offset;
  }

  /** */
  private int emitArc(OutputStream os, int flags, byte label, int targetOffset) throws IOException {
    int length = 0;

    int labelIndex = labelsInvIndex[label & 0xff];
    if (labelIndex > 0) {
      if (os != null) os.write(flags | labelIndex);
      length++;
    } else {
      if (os != null) {
        os.write(flags);
        os.write(label);
      }
      length += 2;
    }

    if ((flags & BIT_TARGET_NEXT) == 0) {
      int len = writeVInt(scratch, 0, targetOffset);
      if (os != null) {
        os.write(scratch, 0, len);
      }
      length += len;
    }

    return length;
  }

  /** */
  private int emitNodeData(OutputStream os, int number) throws IOException {
    int size = 0;

    if (withNumbers) {
      size = writeVInt(scratch, 0, number);
      if (os != null) {
        os.write(scratch, 0, size);
      }
    }

    return size;
  }

  /** */
  @Override
  public CFSA2Serializer withFiller(byte filler) {
    throw new UnsupportedOperationException("CFSA2 does not support filler. Use .info file.");
  }

  /** */
  @Override
  public CFSA2Serializer withAnnotationSeparator(byte annotationSeparator) {
    throw new UnsupportedOperationException("CFSA2 does not support separator. Use .info file.");
  }

  /** Write a v-int to a byte array. */
  static int writeVInt(byte[] array, int offset, int value) {
    assert value >= 0 : "Can't v-code negative ints.";

    while (value > 0x7F) {
      array[offset++] = (byte) (0x80 | (value & 0x7F));
      value >>= 7;
    }
    array[offset++] = (byte) value;

    return offset;
  }
}


================================================
FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/ConstantArcSizeFSA.java
================================================
package morfologik.fsa.builders;

import java.util.Collections;
import java.util.Set;
import morfologik.fsa.FSA;
import morfologik.fsa.FSAFlags;

/**
 * An FSA with constant-size arc representation produced directly by {@link FSABuilder}.
 *
 * @see FSABuilder
 */
final class ConstantArcSizeFSA extends FSA {
  /** Size of the target address field (constant for the builder). */
  public static final int TARGET_ADDRESS_SIZE = 4;

  /** Size of the flags field (constant for the builder). */
  public static final int FLAGS_SIZE = 1;

  /** Size of the label field (constant for the builder). */
  public static final int LABEL_SIZE = 1;

  /** Size of a single arc structure. */
  public static final int ARC_SIZE = FLAGS_SIZE + LABEL_SIZE + TARGET_ADDRESS_SIZE;

  /** Offset of the flags field inside an arc. */
  public static final int FLAGS_OFFSET = 0;

  /** Offset of the label field inside an arc. */
  public static final int LABEL_OFFSET = FLAGS_SIZE;

  /** Offset of the address field inside an arc. */
  public static final int ADDRESS_OFFSET = LABEL_OFFSET + LABEL_SIZE;

  /** A dummy address of the terminal state. */
  static final int TERMINAL_STATE = 0;

  /** An arc flag indicating the target node of an arc corresponds to a final state. */
  public static final int BIT_ARC_FINAL = 1 << 1;

  /** An arc flag indicating the arc is last within its state. */
  public static final int BIT_ARC_LAST = 1 << 0;

  /**
   * An epsilon state. The first and only arc of this state points either to the root or to the
   * terminal state, indicating an empty automaton.
   */
  private final int epsilon;

  /** FSA data, serialized as a byte array. */
  private final byte[] data;

  /**
   * @param data FSA data. There must be no trailing bytes after the last state.
   */
  ConstantArcSizeFSA(byte[] data, int epsilon) {
    assert epsilon == 0 : "Epsilon is not zero?";

    this.epsilon = epsilon;
    this.data = data;
  }

  @Override
  public int getRootNode() {
    return getEndNode(getFirstArc(epsilon));
  }

  @Override
  public int getFirstArc(int node) {
    return node;
  }

  @Override
  public int getArc(int node, byte label) {
    for (int arc = getFirstArc(node); arc != 0; arc = getNextArc(arc)) {
      if (getArcLabel(arc) == label) return arc;
    }
    return 0;
  }

  @Override
  public int getNextArc(int arc) {
    if (isArcLast(arc)) return 0;
    return arc + ARC_SIZE;
  }

  @Override
  public byte getArcLabel(int arc) {
    return data[arc + LABEL_OFFSET];
  }

  /** Fills the target state address of an arc. */
  private int getArcTarget(int arc) {
    arc += ADDRESS_OFFSET;
    return (data[arc]) << 24
        | (data[arc + 1] & 0xff) << 16
        | (data[arc + 2] & 0xff) << 8
        | (data[arc + 3] & 0xff);
  }

  @Override
  public boolean isArcFinal(int arc) {
    return (data[arc + FLAGS_OFFSET] & BIT_ARC_FINAL) != 0;
  }

  @Override
  public boolean isArcTerminal(int arc) {
    return getArcTarget(arc) == 0;
  }

  private boolean isArcLast(int arc) {
    return (data[arc + FLAGS_OFFSET] & BIT_ARC_LAST) != 0;
  }

  @Override
  public int getEndNode(int arc) {
    return getArcTarget(arc);
  }

  @Override
  public Set<FSAFlags> getFlags() {
    return Collections.emptySet();
  }
}


================================================
FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSA5Serializer.java
================================================
package morfologik.fsa.builders;

import static morfologik.fsa.FSAFlags.*;

import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntStack;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.BitSet;
import java.util.EnumSet;
import java.util.Set;
import morfologik.fsa.FSA;
import morfologik.fsa.FSA5;
import morfologik.fsa.FSAFlags;
import morfologik.fsa.FSAHeader;

/**
 * Serializes in-memory {@link FSA} graphs to a binary format compatible with Jan Daciuk's <code>fsa
 * </code>'s package <code>FSA5</code> format.
 *
 * <p>It is possible to serialize the automaton with numbers required for perfect hashing. See
 * {@link #withNumbers()} method.
 *
 * @see FSA5
 * @see FSA#read(java.io.InputStream)
 */
public final class FSA5Serializer implements FSASerializer {
  /** Maximum number of bytes for a serialized arc. */
  private static final int MAX_ARC_SIZE = 1 + 5;

  /** Maximum number of bytes for per-node data. */
  private static final int MAX_NODE_DATA_SIZE = 16;

  /** Number of bytes for the arc's flags header (arc representation without the goto address). */
  private static final int SIZEOF_FLAGS = 1;

  /** Supported flags. */
  private static final EnumSet<FSAFlags> flags =
      EnumSet.of(NUMBERS, SEPARATORS, FLEXIBLE, STOPBIT, NEXTBIT);

  /**
   * @see FSA5#filler
   */
  public byte fillerByte = FSA5.DEFAULT_FILLER;

  /**
   * @see FSA5#annotation
   */
  public byte annotationByte = FSA5.DEFAULT_ANNOTATION;

  /**
   * <code>true</code> if we should serialize with numbers.
   *
   * @see #withNumbers()
   */
  private boolean withNumbers;

  /** A hash map of [state, offset] pairs. */
  private IntIntHashMap offsets = new IntIntHashMap();

  /** A hash map of [state, right-language-count] pairs. */
  private IntIntHashMap numbers = new IntIntHashMap();

  /**
   * Serialize the automaton with the number of right-language sequences in each node. This is
   * required to implement perfect hashing. The numbering also preserves the order of input
   * sequences.
   *
   * @return Returns the same object for easier call chaining.
   */
  public FSA5Serializer withNumbers() {
    withNumbers = true;
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public FSA5Serializer withFiller(byte filler) {
    this.fillerByte = filler;
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public FSA5Serializer withAnnotationSeparator(byte annotationSeparator) {
    this.annotationByte = annotationSeparator;
    return this;
  }

  /**
   * Serialize root state <code>s</code> to an output stream in <code>FSA5</code> format.
   *
   * @see #withNumbers()
   * @return Returns <code>os</code> for chaining.
   */
  @Override
  public <T extends OutputStream> T serialize(final FSA fsa, T os) throws IOException {

    // Prepare space for arc offsets and linearize all the states.
    int[] linearized = linearize(fsa);

    /*
     * Calculate the number of bytes required for the node data, if
     * serializing with numbers.
     */
    int nodeDataLength = 0;
    if (withNumbers) {
      this.numbers = FSAUtils.rightLanguageForAllStates(fsa);
      int maxNumber = numbers.get(fsa.getRootNode());
      while (maxNumber > 0) {
        nodeDataLength++;
        maxNumber >>>= 8;
      }
    }

    // Calculate minimal goto length.
    int gtl = 1;
    while (true) {
      // First pass: calculate offsets of states.
      if (!emitArcs(fsa, null, linearized, gtl, nodeDataLength)) {
        gtl++;
        continue;
      }

      // Second pass: check if goto overflows anywhere.
      if (emitArcs(fsa, null, linearized, gtl, nodeDataLength)) break;

      gtl++;
    }

    /*
     * Emit the header.
     */
    FSAHeader.write(os, FSA5.VERSION);
    os.write(fillerByte);
    os.write(annotationByte);
    os.write((nodeDataLength << 4) | gtl);

    /*
     * Emit the automaton.
     */
    boolean gtlUnchanged = emitArcs(fsa, os, linearized, gtl, nodeDataLength);
    assert gtlUnchanged : "gtl changed in the final pass.";

    return os;
  }

  /** Return supported flags. */
  @Override
  public Set<FSAFlags> getFlags() {
    return flags;
  }

  /** Linearization of states. */
  private int[] linearize(final FSA fsa) {
    int[] linearized = new int[0];
    int last = 0;

    BitSet visited = new BitSet();
    IntStack nodes = new IntStack();
    nodes.push(fsa.getRootNode());

    while (!nodes.isEmpty()) {
      final int node = nodes.pop();
      if (visited.get(node)) {
        continue;
      }

      if (last >= linearized.length) {
        linearized = Arrays.copyOf(linearized, linearized.length + 100000);
      }

      visited.set(node);
      linearized[last++] = node;

      for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
        if (!fsa.isArcTerminal(arc)) {
          int target = fsa.getEndNode(arc);
          if (!visited.get(target)) nodes.push(target);
        }
      }
    }

    return Arrays.copyOf(linearized, last);
  }

  /** Update arc offsets assuming the given goto length. */
  private boolean emitArcs(FSA fsa, OutputStream os, int[] linearized, int gtl, int nodeDataLength)
      throws IOException {
    final ByteBuffer bb = ByteBuffer.allocate(Math.max(MAX_NODE_DATA_SIZE, MAX_ARC_SIZE));

    int offset = 0;

    // Add dummy terminal state.
    offset += emitNodeData(bb, os, nodeDataLength, 0);
    offset += emitArc(bb, os, gtl, 0, (byte) 0, 0);

    // Add epsilon state.
    offset += emitNodeData(bb, os, nodeDataLength, 0);
    if (fsa.getRootNode() != 0)
      offset += emitArc(bb, os, gtl, FSA5.BIT_LAST_ARC | FSA5.BIT_TARGET_NEXT, (byte) '^', 0);
    else offset += emitArc(bb, os, gtl, FSA5.BIT_LAST_ARC, (byte) '^', 0);

    int maxStates = linearized.length;
    for (int j = 0; j < maxStates; j++) {
      final int s = linearized[j];

      if (os == null) {
        offsets.put(s, offset);
      } else {
        assert offsets.get(s) == offset : s + " " + offsets.get(s) + " " + offset;
      }

      offset += emitNodeData(bb, os, nodeDataLength, withNumbers ? numbers.get(s) : 0);

      for (int arc = fsa.getFirstArc(s); arc != 0; arc = fsa.getNextArc(arc)) {
        int targetOffset;
        final int target;
        if (fsa.isArcTerminal(arc)) {
          targetOffset = 0;
          target = 0;
        } else {
          target = fsa.getEndNode(arc);
          targetOffset = offsets.get(target);
        }

        int flags = 0;
        if (fsa.isArcFinal(arc)) {
          flags |= FSA5.BIT_FINAL_ARC;
        }

        if (fsa.getNextArc(arc) == 0) {
          flags |= FSA5.BIT_LAST_ARC;

          if (j + 1 < maxStates && target == linearized[j + 1] && targetOffset != 0) {
            flags |= FSA5.BIT_TARGET_NEXT;
            targetOffset = 0;
          }
        }

        int bytes = emitArc(bb, os, gtl, flags, fsa.getArcLabel(arc), targetOffset);
        if (bytes < 0)
          // gtl too small. interrupt eagerly.
          return false;

        offset += bytes;
      }
    }

    return true;
  }

  /** */
  private int emitArc(
      ByteBuffer bb, OutputStream os, int gtl, int flags, byte label, int targetOffset)
      throws IOException {
    int arcBytes = (flags & FSA5.BIT_TARGET_NEXT) != 0 ? SIZEOF_FLAGS : gtl;

    flags |= (targetOffset << 3);
    bb.put(label);
    for (int b = 0; b < arcBytes; b++) {
      bb.put((byte) flags);
      flags >>>= 8;
    }

    if (flags != 0) {
      // gtl too small. interrupt eagerly.
      return -1;
    }

    bb.flip();
    int bytes = bb.remaining();
    if (os != null) {
      os.write(bb.array(), bb.position(), bb.remaining());
    }
    bb.clear();

    return bytes;
  }

  /** */
  private int emitNodeData(ByteBuffer bb, OutputStream os, int nodeDataLength, int number)
      throws IOException {
    if (nodeDataLength > 0 && os != null) {
      for (int i = 0; i < nodeDataLength; i++) {
        bb.put((byte) number);
        number >>>= 8;
      }

      bb.flip();
      os.write(bb.array(), bb.position(), bb.remaining());
      bb.clear();
    }

    return nodeDataLength;
  }
}


================================================
FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSABuilder.java
================================================
package morfologik.fsa.builders;

import static morfologik.fsa.builders.ConstantArcSizeFSA.*;

import java.util.*;
import morfologik.fsa.FSA;

/**
 * Fast, memory-conservative finite state automaton builder, returning an in-memory {@link FSA} that
 * is a tradeoff between construction speed and memory consumption. Use serializers to compress the
 * returned automaton into more compact form.
 *
 * @see FSASerializer
 */
public final class FSABuilder {
  /**
   * Debug and information constants.
   *
   * @see FSABuilder#getInfo()
   */
  public enum InfoEntry {
    SERIALIZATION_BUFFER_SIZE("Serialization buffer size"),
    SERIALIZATION_BUFFER_REALLOCATIONS("Serialization buffer reallocs"),
    CONSTANT_ARC_AUTOMATON_SIZE("Constant arc FSA size"),
    MAX_ACTIVE_PATH_LENGTH("Max active path"),
    STATE_REGISTRY_TABLE_SLOTS("Registry hash slots"),
    STATE_REGISTRY_SIZE("Registry hash entries"),
    ESTIMATED_MEMORY_CONSUMPTION_MB("Estimated mem consumption (MB)");

    private final String stringified;

    InfoEntry(String stringified) {
      this.stringified = stringified;
    }

    @Override
    public String toString() {
      return stringified;
    }
  }

  /** A megabyte. */
  private static final int MB = 1024 * 1024;

  /** Internal serialized FSA buffer expand ratio. */
  private static final int BUFFER_GROWTH_SIZE = 5 * MB;

  /** Maximum number of labels from a single state. */
  private static final int MAX_LABELS = 256;

  /** A comparator comparing full byte arrays. Unsigned byte comparisons ('C'-locale). */
  public static final Comparator<byte[]> LEXICAL_ORDERING =
      new Comparator<byte[]>() {
        public int compare(byte[] o1, byte[] o2) {
          return FSABuilder.compare(o1, 0, o1.length, o2, 0, o2.length);
        }
      };

  /** Internal serialized FSA buffer expand ratio. */
  private final int bufferGrowthSize;

  /**
   * Holds serialized and mutable states. Each state is a sequential list of arcs, the last arc is
   * marked with {@link #BIT_ARC_LAST}.
   */
  private byte[] serialized = new byte[0];

  /**
   * Number of bytes already taken in {@link #serialized}. Start from 1 to keep 0 a sentinel value
   * (for the hash set and final state).
   */
  private int size;

  /**
   * States on the "active path" (still mutable). Values are addresses of each state's first arc.
   */
  private int[] activePath = new int[0];

  /** Current length of the active path. */
  private int activePathLen;

  /** The next offset at which an arc will be added to the given state on {@link #activePath}. */
  private int[] nextArcOffset = new int[0];

  /** Root state. If negative, the automaton has been built already and cannot be extended. */
  private int root;

  /**
   * An epsilon state. The first and only arc of this state points either to the root or to the
   * terminal state, indicating an empty automaton.
   */
  private int epsilon;

  /**
   * Hash set of state addresses in {@link #serialized}, hashed by {@link #hash(int, int)}. Zero
   * reserved for an unoccupied slot.
   */
  private int[] hashSet = new int[2];

  /** Number of entries currently stored in {@link #hashSet}. */
  private int hashSize = 0;

  /**
   * Previous sequence added to the automaton in {@link #add(byte[], int, int)}. Used in assertions
   * only.
   */
  private byte[] previous;

  /** Information about the automaton and its compilation. */
  private TreeMap<InfoEntry, Object> info;

  /** {@link #previous} sequence's length, used in assertions only. */
  private int previousLength;

  /** */
  public FSABuilder() {
    this(BUFFER_GROWTH_SIZE);
  }

  /**
   * @param bufferGrowthSize Buffer growth size (in bytes) when constructing the automaton.
   */
  public FSABuilder(int bufferGrowthSize) {
    this.bufferGrowthSize = Math.max(bufferGrowthSize, ARC_SIZE * MAX_LABELS);

    // Allocate epsilon state.
    epsilon = allocateState(1);
    serialized[epsilon + FLAGS_OFFSET] |= BIT_ARC_LAST;

    // Allocate root, with an initial empty set of output arcs.
    expandActivePath(1);
    root = activePath[0];
  }

  /**
   * Add a single sequence of bytes to the FSA. The input must be lexicographically greater than any
   * previously added sequence.
   *
   * @param sequence The array holding input sequence of bytes.
   * @param start Starting offset (inclusive)
   * @param len Length of the input sequence (at least 1 byte).
   */
  public void add(byte[] sequence, int start, int len) {
    assert serialized != null : "Automaton already built.";
    assert previous == null
            || len == 0
            || compare(previous, 0, previousLength, sequence, start, len) <= 0
        : "Input must be sorted: "
            + Arrays.toString(Arrays.copyOf(previous, previousLength))
            + " >= "
            + Arrays.toString(Arrays.copyOfRange(sequence, start, len));
    assert setPrevious(sequence, start, len);

    // Determine common prefix length.
    final int commonPrefix = commonPrefix(sequence, start, len);

    // Make room for extra states on active path, if needed.
    expandActivePath(len);

    // Freeze all the states after the common prefix.
    for (int i = activePathLen - 1; i > commonPrefix; i--) {
      final int frozenState = freezeState(i);
      setArcTarget(nextArcOffset[i - 1] - ARC_SIZE, frozenState);
      nextArcOffset[i] = activePath[i];
    }

    // Create arcs to new suffix states.
    for (int i = commonPrefix + 1, j = start + commonPrefix; i <= len; i++) {
      final int p = nextArcOffset[i - 1];

      serialized[p + FLAGS_OFFSET] = (byte) (i == len ? BIT_ARC_FINAL : 0);
      serialized[p + LABEL_OFFSET] = sequence[j++];
      setArcTarget(p, i == len ? TERMINAL_STATE : activePath[i]);

      nextArcOffset[i - 1] = p + ARC_SIZE;
    }

    // Save last sequence's length so that we don't need to calculate it again.
    this.activePathLen = len;
  }

  /** Number of serialization buffer reallocations. */
  private int serializationBufferReallocations;

  /**
   * @return Finalizes the construction of the automaton and returns it.
   */
  public FSA complete() {
    add(new byte[0], 0, 0);

    if (nextArcOffset[0] - activePath[0] == 0) {
      // An empty FSA.
      setArcTarget(epsilon, TERMINAL_STATE);
    } else {
      // An automaton with at least a single arc from root.
      root = freezeState(0);
      setArcTarget(epsilon, root);
    }

    info = new TreeMap<InfoEntry, Object>();
    info.put(InfoEntry.SERIALIZATION_BUFFER_SIZE, serialized.length);
    info.put(InfoEntry.SERIALIZATION_BUFFER_REALLOCATIONS, serializationBufferReallocations);
    info.put(InfoEntry.CONSTANT_ARC_AUTOMATON_SIZE, size);
    info.put(InfoEntry.MAX_ACTIVE_PATH_LENGTH, activePath.length);
    info.put(InfoEntry.STATE_REGISTRY_TABLE_SLOTS, hashSet.length);
    info.put(InfoEntry.STATE_REGISTRY_SIZE, hashSize);
    info.put(
        InfoEntry.ESTIMATED_MEMORY_CONSUMPTION_MB,
        (this.serialized.length + this.hashSet.length * 4) / (double) MB);

    final FSA fsa =
        new ConstantArcSizeFSA(java.util.Arrays.copyOf(this.serialized, this.size), epsilon);
    this.serialized = null;
    this.hashSet = null;
    return fsa;
  }

  /**
   * Build a minimal, deterministic automaton from a sorted list of byte sequences.
   *
   * @param input Input sequences to build automaton from.
   * @return Returns the automaton encoding all input sequences.
   */
  public static FSA build(byte[][] input) {
    final FSABuilder builder = new FSABuilder();

    for (byte[] chs : input) {
      builder.add(chs, 0, chs.length);
    }

    return builder.complete();
  }

  /**
   * Build a minimal, deterministic automaton from an iterable list of byte sequences.
   *
   * @param input Input sequences to build automaton from.
   * @return Returns the automaton encoding all input sequences.
   */
  public static FSA build(Iterable<byte[]> input) {
    final FSABuilder builder = new FSABuilder();

    for (byte[] chs : input) {
      builder.add(chs, 0, chs.length);
    }

    return builder.complete();
  }

  /**
   * @return Returns various statistics concerning the FSA and its compilation.
   * @see InfoEntry
   */
  public Map<InfoEntry, Object> getInfo() {
    return info;
  }

  /** Is this arc the state's last? */
  private boolean isArcLast(int arc) {
    return (serialized[arc + FLAGS_OFFSET] & BIT_ARC_LAST) != 0;
  }

  /** Is this arc final? */
  private boolean isArcFinal(int arc) {
    return (serialized[arc + FLAGS_OFFSET] & BIT_ARC_FINAL) != 0;
  }

  /** Get label's arc. */
  private byte getArcLabel(int arc) {
    return serialized[arc + LABEL_OFFSET];
  }

  /** Fills the target state address of an arc. */
  private void setArcTarget(int arc, int state) {
    arc += ADDRESS_OFFSET + TARGET_ADDRESS_SIZE;
    for (int i = 0; i < TARGET_ADDRESS_SIZE; i++) {
      serialized[--arc] = (byte) state;
      state >>>= 8;
    }
  }

  /** Returns the address of an arc. */
  private int getArcTarget(int arc) {
    arc += ADDRESS_OFFSET;
    return (serialized[arc]) << 24
        | (serialized[arc + 1] & 0xff) << 16
        | (serialized[arc + 2] & 0xff) << 8
        | (serialized[arc + 3] & 0xff);
  }

  /**
   * @return The number of common prefix characters with the previous sequence.
   */
  private int commonPrefix(byte[] sequence, int start, int len) {
    // Empty root state case.
    final int max = Math.min(len, activePathLen);
    int i;
    for (i = 0; i < max; i++) {
      final int lastArc = nextArcOffset[i] - ARC_SIZE;
      if (sequence[start++] != getArcLabel(lastArc)) {
        break;
      }
    }

    return i;
  }

  /**
   * Freeze a state: try to find an equivalent state in the interned states dictionary first, if
   * found, return it, otherwise, serialize the mutable state at <code>activePathIndex</code> and
   * return it.
   */
  private int freezeState(final int activePathIndex) {
    final int start = activePath[activePathIndex];
    final int end = nextArcOffset[activePathIndex];
    final int len = end - start;

    // Set the last arc flag on the current active path's state.
    serialized[end - ARC_SIZE + FLAGS_OFFSET] |= BIT_ARC_LAST;

    // Try to locate a state with an identical content in the hash set.
    final int bucketMask = (hashSet.length - 1);
    int slot = hash(start, len) & bucketMask;
    for (int i = 0; ; ) {
      int state = hashSet[slot];
      if (state == 0) {
        state = hashSet[slot] = serialize(activePathIndex);
        if (++hashSize > hashSet.length / 2) expandAndRehash();
        return state;
      } else if (equivalent(state, start, len)) {
        return state;
      }

      slot = (slot + (++i)) & bucketMask;
    }
  }

  /** Reallocate and rehash the hash set. */
  private void expandAndRehash() {
    final int[] newHashSet = new int[hashSet.length * 2];
    final int bucketMask = (newHashSet.length - 1);

    for (int j = 0; j < hashSet.length; j++) {
      final int state = hashSet[j];
      if (state > 0) {
        int slot = hash(state, stateLength(state)) & bucketMask;
        for (int i = 0; newHashSet[slot] > 0; ) {
          slot = (slot + (++i)) & bucketMask;
        }
        newHashSet[slot] = state;
      }
    }
    this.hashSet = newHashSet;
  }

  /** The total length of the serialized state data (all arcs). */
  private int stateLength(int state) {
    int arc = state;
    while (!isArcLast(arc)) {
      arc += ARC_SIZE;
    }
    return arc - state + ARC_SIZE;
  }

  /** Return <code>true</code> if two regions in {@link #serialized} are identical. */
  private boolean equivalent(int start1, int start2, int len) {
    if (start1 + len > size || start2 + len > size) return false;

    while (len-- > 0) if (serialized[start1++] != serialized[start2++]) return false;

    return true;
  }

  /** Serialize a given state on the active path. */
  private int serialize(final int activePathIndex) {
    expandBuffers();

    final int newState = size;
    final int start = activePath[activePathIndex];
    final int len = nextArcOffset[activePathIndex] - start;
    System.arraycopy(serialized, start, serialized, newState, len);

    size += len;
    return newState;
  }

  /** Hash code of a fragment of {@link #serialized} array. */
  private int hash(int start, int byteCount) {
    assert byteCount % ARC_SIZE == 0 : "Not an arc multiply?";

    int h = 0;
    for (int arcs = byteCount / ARC_SIZE; --arcs >= 0; start += ARC_SIZE) {
      h = 17 * h + getArcLabel(start);
      h = 17 * h + getArcTarget(start);
      if (isArcFinal(start)) h += 17;
    }

    return h;
  }

  /** Append a new mutable state to the active path. */
  private void expandActivePath(int size) {
    if (activePath.length < size) {
      final int p = activePath.length;
      activePath = java.util.Arrays.copyOf(activePath, size);
      nextArcOffset = java.util.Arrays.copyOf(nextArcOffset, size);

      for (int i = p; i < size; i++) {
        nextArcOffset[i] = activePath[i] = allocateState(/* assume max labels count */ MAX_LABELS);
      }
    }
  }

  /** Expand internal buffers for the next state. */
  private void expandBuffers() {
    if (this.serialized.length < size + ARC_SIZE * MAX_LABELS) {
      serialized = java.util.Arrays.copyOf(serialized, serialized.length + bufferGrowthSize);
      serializationBufferReallocations++;
    }
  }

  /**
   * Allocate space for a state with the given number of outgoing labels.
   *
   * @return state offset
   */
  private int allocateState(int labels) {
    expandBuffers();
    final int state = size;
    size += labels * ARC_SIZE;
    return state;
  }

  /** Copy <code>current</code> into an internal buffer. */
  private boolean setPrevious(byte[] sequence, int start, int length) {
    if (previous == null || previous.length < length) {
      previous = new byte[length];
    }

    System.arraycopy(sequence, start, previous, 0, length);
    previousLength = length;
    return true;
  }

  /**
   * Lexicographic order of input sequences. By default, consistent with the "C" sort (absolute
   * value of bytes, 0-255).
   */
  private static int compare(byte[] s1, int start1, int lens1, byte[] s2, int start2, int lens2) {
    final int max = Math.min(lens1, lens2);

    for (int i = 0; i < max; i++) {
      final byte c1 = s1[start1++];
      final byte c2 = s2[start2++];
      if (c1 != c2) return (c1 & 0xff) - (c2 & 0xff);
    }

    return lens1 - lens2;
  }
}


================================================
FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSAInfo.java
================================================
package morfologik.fsa.builders;

import com.carrotsearch.hppc.IntIntHashMap;
import java.util.BitSet;
import morfologik.fsa.FSA;
import morfologik.fsa.FSA5;

/** Compute additional information about an FSA: number of arcs, nodes, etc. */
public final class FSAInfo {
  /** Computes the exact number of states and nodes by recursively traversing the FSA. */
  private static class NodeVisitor {
    final BitSet visitedArcs = new BitSet();
    final BitSet visitedNodes = new BitSet();

    int nodes;
    int arcs;
    int totalArcs;

    private final FSA fsa;

    NodeVisitor(FSA fsa) {
      this.fsa = fsa;
    }

    public void visitNode(final int node) {
      if (visitedNodes.get(node)) {
        return;
      }
      visitedNodes.set(node);

      nodes++;
      for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
        if (!visitedArcs.get(arc)) {
          arcs++;
        }
        totalArcs++;
        visitedArcs.set(arc);

        if (!fsa.isArcTerminal(arc)) {
          visitNode(fsa.getEndNode(arc));
        }
      }
    }
  }

  /** Computes the exact number of final states. */
  private static class FinalStateVisitor {
    final IntIntHashMap visitedNodes = new IntIntHashMap();

    private final FSA fsa;

    FinalStateVisitor(FSA fsa) {
      this.fsa = fsa;
    }

    public int visitNode(int node) {
      int index = visitedNodes.indexOf(node);
      if (index >= 0) {
        return visitedNodes.indexGet(index);
      }

      int fromHere = 0;
      for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
        if (fsa.isArcFinal(arc)) fromHere++;

        if (!fsa.isArcTerminal(arc)) {
          fromHere += visitNode(fsa.getEndNode(arc));
        }
      }
      visitedNodes.put(node, fromHere);
      return fromHere;
    }
  }

  /** Number of nodes in the automaton. */
  public final int nodeCount;

  /**
   * Number of arcs in the automaton, excluding an arcs from the zero node (initial) and an arc from
   * the start node to the root node.
   */
  public final int arcsCount;

  /** Total number of arcs, counting arcs that physically overlap due to merging. */
  public final int arcsCountTotal;

  /** Number of final states (number of input sequences stored in the automaton). */
  public final int finalStatesCount;

  /** Arcs size (in serialized form). */
  public final int size;

  /*
   *
   */
  public FSAInfo(FSA fsa) {
    final NodeVisitor w = new NodeVisitor(fsa);
    int root = fsa.getRootNode();
    if (root > 0) {
      w.visitNode(root);
    }

    this.nodeCount = 1 + w.nodes;
    this.arcsCount = 1 + w.arcs;
    this.arcsCountTotal = 1 + w.totalArcs;

    final FinalStateVisitor fsv = new FinalStateVisitor(fsa);
    this.finalStatesCount = fsv.visitNode(fsa.getRootNode());

    if (fsa instanceof FSA5) {
      this.size = ((FSA5) fsa).arcs.length;
    } else {
      this.size = 0;
    }
  }

  /*
   *
   */
  public FSAInfo(int nodeCount, int arcsCount, int arcsCountTotal, int finalStatesCount) {
    this.nodeCount = nodeCount;
    this.arcsCount = arcsCount;
    this.arcsCountTotal = arcsCountTotal;
    this.finalStatesCount = finalStatesCount;
    this.size = 0;
  }

  /*
   *
   */
  @Override
  public String toString() {
    return "Nodes: "
        + nodeCount
        + ", arcs visited: "
        + arcsCount
        + ", arcs total: "
        + arcsCountTotal
        + ", final states: "
        + finalStatesCount
        + ", size: "
        + size;
  }
}


================================================
FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSASerializer.java
================================================
package morfologik.fsa.builders;

import java.io.IOException;
import java.io.OutputStream;
import java.util.Set;
import morfologik.fsa.FSA;
import morfologik.fsa.FSAFlags;

/** All FSA serializers (to binary formats) will implement this interface. */
public interface FSASerializer {
  /**
   * Serialize a finite state automaton to an output stream.
   *
   * @param fsa The automaton to serialize.
   * @param os The output stream to serialize to.
   * @param <T> A subclass of {@link OutputStream}, returned for chaining.
   * @return Returns <code>T</code> for chaining.
   * @throws IOException Rethrown if an I/O error occurs.
   */
  public <T extends OutputStream> T serialize(FSA fsa, T os) throws IOException;

  /**
   * @return Returns the set of flags supported by the serializer (and the output automaton).
   */
  public Set<FSAFlags> getFlags();

  /**
   * Sets the filler separator (only if {@link #getFlags()} returns {@link FSAFlags#SEPARATORS}).
   *
   * @param filler The filler separator byte.
   * @return Returns <code>this</code> for call chaining.
   */
  public FSASerializer withFiller(byte filler);

  /**
   * Sets the annotation separator (only if {@link #getFlags()} returns {@link
   * FSAFlags#SEPARATORS}).
   *
   * @param annotationSeparator The filler separator byte.
   * @return Returns <code>this</code> for call chaining.
   */
  public FSASerializer withAnnotationSeparator(byte annotationSeparator);

  /**
   * Enables support for right language count on nodes, speeding up perfect hash counts (only if
   * {@link #getFlags()} returns {@link FSAFlags#NUMBERS}).
   *
   * @return Returns <code>this</code> for call chaining.
   */
  public FSASerializer withNumbers();
}


================================================
FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSAUtils.java
================================================
package morfologik.fsa.builders;

import com.carrotsearch.hppc.IntIntHashMap;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.BitSet;
import java.util.TreeMap;
import morfologik.fsa.FSA;
import morfologik.fsa.FSA5;
import morfologik.fsa.FSAFlags;
import morfologik.fsa.StateVisitor;

/** Other FSA-related utilities not directly associated with the class hierarchy. */
public final class FSAUtils {
  public static final class IntIntHolder {
    public int a;
    public int b;

    public IntIntHolder(int a, int b) {
      this.a = a;
      this.b = b;
    }

    public IntIntHolder() {}
  }

  /**
   * Returns the right-language reachable from a given FSA node, formatted as an input for the
   * graphviz package (expressed in the <code>dot</code> language).
   *
   * @param fsa The automaton to visualize.
   * @param node Starting node (subgraph will be visualized unless it's the automaton's root node).
   * @return Returns the dot language description of the automaton.
   */
  public static String toDot(FSA fsa, int node) {
    try {
      StringWriter w = new StringWriter();
      toDot(w, fsa, node);
      return w.toString();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Saves the right-language reachable from a given FSA node, formatted as an input for the
   * graphviz package (expressed in the <code>dot</code> language), to the given writer.
   *
   * @param w The writer to write dot language description of the automaton.
   * @param fsa The automaton to visualize.
   * @param node Starting node (subgraph will be visualized unless it's the automaton's root node).
   * @throws IOException Rethrown if an I/O exception occurs.
   */
  public static void toDot(Writer w, FSA fsa, int node) throws IOException {
    w.write("digraph Automaton {\n");
    w.write("  rankdir = LR;\n");

    final BitSet visited = new BitSet();

    w.write("  stop [shape=doublecircle,label=\"\"];\n");
    w.write("  initial [shape=plaintext,label=\"\"];\n");
    w.write("  initial -> " + node + "\n\n");

    visitNode(w, 0, fsa, node, visited);
    w.write("}\n");
  }

  private static void visitNode(Writer w, int d, FSA fsa, int s, BitSet visited)
      throws IOException {
    visited.set(s);
    w.write("  ");
    w.write(Integer.toString(s));

    if (fsa.getFlags().contains(FSAFlags.NUMBERS)) {
      int nodeNumber = fsa.getRightLanguageCount(s);
      w.write(" [shape=circle,label=\"" + nodeNumber + "\"];\n");
    } else {
      w.write(" [shape=circle,label=\"\"];\n");
    }

    for (int arc = fsa.getFirstArc(s); arc != 0; arc = fsa.getNextArc(arc)) {
      w.write("  ");
      w.write(Integer.toString(s));
      w.write(" -> ");
      if (fsa.isArcTerminal(arc)) {
        w.write("stop");
      } else {
        w.write(Integer.toString(fsa.getEndNode(arc)));
      }

      final byte label = fsa.getArcLabel(arc);
      w.write(" [label=\"");
      if (Character.isLetterOrDigit(label)) w.write((char) label);
      else {
        w.write("0x");
        w.write(Integer.toHexString(label & 0xFF));
      }
      w.write("\"");
      if (fsa.isArcFinal(arc)) w.write(" arrowhead=\"tee\"");
      if (fsa instanceof FSA5) {
        if (((FSA5) fsa).isNextSet(arc)) {
          w.write(" color=\"blue\"");
        }
      }

      w.write("]\n");
    }

    for (int arc = fsa.getFirstArc(s); arc != 0; arc = fsa.getNextArc(arc)) {
      if (!fsa.isArcTerminal(arc)) {
        int endNode = fsa.getEndNode(arc);
        if (!visited.get(endNode)) {
          visitNode(w, d + 1, fsa, endNode, visited);
        }
      }
    }
  }

  /**
   * Calculate fan-out ratio (how many nodes have a given number of outgoing arcs).
   *
   * @param fsa The automaton to calculate fanout for.
   * @param root The starting node for calculations.
   * @return The returned map contains keys for the number of outgoing arcs and an associated value
   *     being the number of nodes with that arc number.
   */
  public static TreeMap<Integer, Integer> calculateFanOuts(final FSA fsa, int root) {
    final int[] result = new int[256];
    fsa.visitInPreOrder(
        new StateVisitor() {
          public boolean accept(int state) {
            int count = 0;
            for (int arc = fsa.getFirstArc(state); arc != 0; arc = fsa.getNextArc(arc)) {
              count++;
            }
            result[count]++;
            return true;
          }
        });

    TreeMap<Integer, Integer> output = new TreeMap<Integer, Integer>();

    int low = 1; // Omit #0, there is always a single node like that (dummy).
    while (low < result.length && result[low] == 0) {
      low++;
    }

    int high = result.length - 1;
    while (high >= 0 && result[high] == 0) {
      high--;
    }

    for (int i = low; i <= high; i++) {
      output.put(i, result[i]);
    }

    return output;
  }

  /**
   * Calculate the size of "right language" for each state in an FSA. The right language is the
   * number of sequences encoded from a given node in the automaton.
   *
   * @param fsa The automaton to calculate right language for.
   * @return Returns a map with node identifiers as keys and their right language counts as
   *     associated values.
   */
  public static IntIntHashMap rightLanguageForAllStates(final FSA fsa) {
    final IntIntHashMap numbers = new IntIntHashMap();

    fsa.visitInPostOrder(
        new StateVisitor() {
          public boolean accept(int state) {
            int thisNodeNumber = 0;
            for (int arc = fsa.getFirstArc(state); arc != 0; arc = fsa.getNextArc(arc)) {
              thisNodeNumber +=
                  (fsa.isArcFinal(arc) ? 1 : 0)
                      + (fsa.isArcTerminal(arc) ? 0 : numbers.get(fsa.getEndNode(arc)));
            }
            numbers.put(state, thisNodeNumber);

            return true;
          }
        });

    return numbers;
  }
}


================================================
FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/CFSA2SerializerTest.java
================================================
package morfologik.fsa.builders;

/** */
public class CFSA2SerializerTest extends SerializerTestBase {
  protected CFSA2Serializer createSerializer() {
    return new CFSA2Serializer();
  }
}


================================================
FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSA5SerializerTest.java
================================================
package morfologik.fsa.builders;

/** */
public class FSA5SerializerTest extends SerializerTestBase {
  protected FSA5Serializer createSerializer() {
    return new FSA5Serializer();
  }
}


================================================
FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSA5Test.java
================================================
package morfologik.fsa.builders;

import static morfologik.fsa.FSAFlags.*;
import static org.junit.jupiter.api.Assertions.*;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import morfologik.fsa.FSA;
import morfologik.fsa.FSA5;
import morfologik.fsa.FSAFlags;
import org.junit.jupiter.api.Test;

/** Additional tests for {@link FSA5}. */
public final class FSA5Test extends TestBase {
  public List<String> expected = Arrays.asList("a", "aba", "ac", "b", "ba", "c");

  @Test
  public void testVersion5() throws IOException {
    final FSA fsa = FSA.read(this.getClass().getResourceAsStream("abc.fsa"));
    assertFalse(fsa.getFlags().contains(FSAFlags.NUMBERS));
    verifyContent(expected, fsa);
  }

  @Test
  public void testVersion5WithNumbers() throws IOException {
    final FSA fsa = FSA.read(this.getClass().getResourceAsStream("abc-numbers.fsa"));

    verifyContent(expected, fsa);
    assertTrue(fsa.getFlags().contains(FSAFlags.NUMBERS));
  }

  @Test
  public void testArcsAndNodes() throws IOException {
    final FSA fsa1 = FSA.read(this.getClass().getResourceAsStream("abc.fsa"));
    final FSA fsa2 = FSA.read(this.getClass().getResourceAsStream("abc-numbers.fsa"));

    FSAInfo info1 = new FSAInfo(fsa1);
    FSAInfo info2 = new FSAInfo(fsa2);

    assertEquals(info1.arcsCount, info2.arcsCount);
    assertEquals(info1.nodeCount, info2.nodeCount);

    assertEquals(4, info2.nodeCount);
    assertEquals(7, info2.arcsCount);
  }

  @Test
  public void testNumbers() throws IOException {
    final FSA fsa = FSA.read(this.getClass().getResourceAsStream("abc-numbers.fsa"));

    assertTrue(fsa.getFlags().contains(NEXTBIT));

    // Get all numbers for nodes.
    byte[] buffer = new byte[128];
    final ArrayList<String> result = new ArrayList<String>();
    walkNode(buffer, 0, fsa, fsa.getRootNode(), 0, result);

    Collections.sort(result);
    assertEquals(Arrays.asList("0 c", "1 b", "2 ba", "3 a", "4 ac", "5 aba"), result);
  }

  public static void walkNode(
      byte[] buffer, int depth, FSA fsa, int node, int cnt, List<String> result)
      throws IOException {
    for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
      buffer[depth] = fsa.getArcLabel(arc);

      if (fsa.isArcFinal(arc) || fsa.isArcTerminal(arc)) {
        result.add(cnt + " " + new String(buffer, 0, depth + 1, "UTF-8"));
      }

      if (fsa.isArcFinal(arc)) {
        cnt++;
      }

      if (!fsa.isArcTerminal(arc)) {
        walkNode(buffer, depth + 1, fsa, fsa.getEndNode(arc), cnt, result);
        cnt += fsa.getRightLanguageCount(fsa.getEndNode(arc));
      }
    }
  }

  private static void verifyContent(List<String> expected, FSA fsa) throws IOException {
    final ArrayList<String> actual = new ArrayList<String>();

    int count = 0;
    for (ByteBuffer bb : fsa.getSequences()) {
      assertEquals(0, bb.arrayOffset());
      assertEquals(0, bb.position());
      actual.add(new String(bb.array(), 0, bb.remaining(), "UTF-8"));
      count++;
    }
    assertEquals(expected.size(), count);
    Collections.sort(actual);
    assertEquals(expected, actual);
  }
}


================================================
FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSABuilderTest.java
================================================
package morfologik.fsa.builders;

import static morfologik.fsa.builders.FSATestUtils.*;
import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.IOException;
import java.util.Arrays;
import java.util.Random;
import morfologik.fsa.FSA;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

public class FSABuilderTest extends TestBase {
  private static byte[][] input;
  private static byte[][] input2;

  @BeforeAll
  public static void prepareByteInput(Random rnd) {
    input = generateRandom(rnd, 25000, new MinMax(1, 20), new MinMax(0, 255));
    input2 = generateRandom(rnd, 40, new MinMax(1, 20), new MinMax(0, 3));
  }

  @Test
  public void testEmptyInput() {
    byte[][] input = {};
    checkCorrect(input, FSABuilder.build(input));
  }

  @Test
  public void testHashResizeBug() throws Exception {
    byte[][] input = {
      {0, 1}, {0, 2}, {1, 1}, {2, 1},
    };

    FSA fsa = FSABuilder.build(input);
    checkCorrect(input, FSABuilder.build(input));
    checkMinimal(fsa);
  }

  @Test
  public void testSmallInput() throws Exception {
    byte[][] input = {
      "abc".getBytes("UTF-8"), "bbc".getBytes("UTF-8"), "d".getBytes("UTF-8"),
    };
    checkCorrect(input, FSABuilder.build(input));
  }

  @Test
  public void testLexicographicOrder() throws IOException {
    byte[][] input = {
      {0}, {1}, {(byte) 0xff},
    };
    Arrays.sort(input, FSABuilder.LEXICAL_ORDERING);

    // Check if lexical ordering is consistent with absolute byte value.
    assertEquals(0, input[0][0]);
    assertEquals(1, input[1][0]);
    assertEquals((byte) 0xff, input[2][0]);

    final FSA fsa;
    checkCorrect(input, fsa = FSABuilder.build(input));

    int arc = fsa.getFirstArc(fsa.getRootNode());
    assertEquals(0, fsa.getArcLabel(arc));
    arc = fsa.getNextArc(arc);
    assertEquals(1, fsa.getArcLabel(arc));
    arc = fsa.getNextArc(arc);
    assertEquals((byte) 0xff, fsa.getArcLabel(arc));
  }

  @Test
  public void testRandom25000_largerAlphabet() {
    FSA fsa = FSABuilder.build(input);
    checkCorrect(input, fsa);
    checkMinimal(fsa);
  }

  @Test
  public void testRandom25000_smallAlphabet() throws IOException {
    FSA fsa = FSABuilder.build(input2);
    checkCorrect(input2, fsa);
    checkMinimal(fsa);
  }
}


================================================
FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSATestUtils.java
================================================
package morfologik.fsa.builders;

import static org.junit.jupiter.api.Assertions.*;

import java.nio.ByteBuffer;
import java.util.*;
import morfologik.fsa.FSA;
import morfologik.fsa.StateVisitor;

public class FSATestUtils {
  /*
   * Generate a sorted list of random sequences.
   */
  public static byte[][] generateRandom(Random rnd, int count, MinMax length, MinMax alphabet) {
    final byte[][] input = new byte[count][];
    for (int i = 0; i < count; i++) {
      input[i] = randomByteSequence(rnd, length, alphabet);
    }
    Arrays.sort(input, FSABuilder.LEXICAL_ORDERING);
    return input;
  }

  /** Generate a random string. */
  private static byte[] randomByteSequence(Random rnd, MinMax length, MinMax alphabet) {
    byte[] bytes = new byte[length.min + rnd.nextInt(length.range())];
    for (int i = 0; i < bytes.length; i++) {
      bytes[i] = (byte) (alphabet.min + rnd.nextInt(alphabet.range()));
    }
    return bytes;
  }

  /*
   * Check if the DFSA is correct with respect to the given input.
   */
  public static void checkCorrect(byte[][] input, FSA fsa) {
    // (1) All input sequences are in the right language.
    HashSet<ByteBuffer> rl = new HashSet<ByteBuffer>();
    for (ByteBuffer bb : fsa) {
      rl.add(ByteBuffer.wrap(Arrays.copyOf(bb.array(), bb.remaining())));
    }

    HashSet<ByteBuffer> uniqueInput = new HashSet<ByteBuffer>();
    for (byte[] sequence : input) {
      uniqueInput.add(ByteBuffer.wrap(sequence));
    }

    for (ByteBuffer sequence : uniqueInput) {
      if (!rl.remove(sequence)) {
        fail("Not present in the right language: " + SerializerTestBase.toString(sequence));
      }
    }

    // (2) No other sequence _other_ than the input is in the right language.
    assertEquals(0, rl.size());
  }

  /*
   * Check if the DFSA reachable from a given state is minimal. This means no
   * two states have the same right language.
   */
  public static void checkMinimal(final FSA fsa) {
    final HashMap<String, Integer> stateLanguages = new HashMap<String, Integer>();

    fsa.visitInPostOrder(
        new StateVisitor() {
          private StringBuilder b = new StringBuilder();

          public boolean accept(int state) {
            List<byte[]> rightLanguage = allSequences(fsa, state);
            Collections.sort(rightLanguage, FSABuilder.LEXICAL_ORDERING);

            b.setLength(0);
            for (byte[] seq : rightLanguage) {
              b.append(Arrays.toString(seq));
              b.append(',');
            }

            String full = b.toString();
            assertFalse(
                stateLanguages.containsKey(full),
                "State exists: " + state + " " + full + " " + stateLanguages.get(full));
            stateLanguages.put(full, state);

            return true;
          }
        });
  }

  static List<byte[]> allSequences(FSA fsa, int state) {
    ArrayList<byte[]> seq = new ArrayList<byte[]>();
    for (ByteBuffer bb : fsa.getSequences(state)) {
      seq.add(Arrays.copyOf(bb.array(), bb.remaining()));
    }
    return seq;
  }

  /*
   * Check if two FSAs are identical.
   */
  public static void checkIdentical(FSA fsa1, FSA fsa2) {
    ArrayDeque<String> fromRoot = new ArrayDeque<String>();
    checkIdentical(
        fromRoot, fsa1, fsa1.getRootNode(), new BitSet(), fsa2, fsa2.getRootNode(), new BitSet());
  }

  /*
   *
   */
  static void checkIdentical(
      ArrayDeque<String> fromRoot,
      FSA fsa1,
      int node1,
      BitSet visited1,
      FSA fsa2,
      int node2,
      BitSet visited2) {
    int arc1 = fsa1.getFirstArc(node1);
    int arc2 = fsa2.getFirstArc(node2);

    if (visited1.get(node1) != visited2.get(node2)) {
      throw new RuntimeException(
          "Two nodes should either be visited or not visited: "
              + Arrays.toString(fromRoot.toArray())
              + " "
              + " node1: "
              + node1
              + " "
              + " node2: "
              + node2);
    }
    visited1.set(node1);
    visited2.set(node2);

    TreeSet<Character> labels1 = new TreeSet<Character>();
    TreeSet<Character> labels2 = new TreeSet<Character>();
    while (true) {
      labels1.add((char) fsa1.getArcLabel(arc1));
      labels2.add((char) fsa2.getArcLabel(arc2));

      arc1 = fsa1.getNextArc(arc1);
      arc2 = fsa2.getNextArc(arc2);

      if (arc1 == 0 || arc2 == 0) {
        if (arc1 != arc2) {
          throw new RuntimeException(
              "Different number of labels at path: " + Arrays.toString(fromRoot.toArray()));
        }
        break;
      }
    }

    if (!labels1.equals(labels2)) {
      throw new RuntimeException(
          "Different sets of labels at path: "
              + Arrays.toString(fromRoot.toArray())
              + ":\n"
              + labels1
              + "\n"
              + labels2);
    }

    // recurse.
    for (char chr : labels1) {
      byte label = (byte) chr;
      fromRoot.push(
          Character.isLetterOrDigit(chr) ? Character.toString(chr) : Integer.toString(chr));

      arc1 = fsa1.getArc(node1, label);
      arc2 = fsa2.getArc(node2, label);

      if (fsa1.isArcFinal(arc1) != fsa2.isArcFinal(arc2)) {
        throw new RuntimeException(
            "Different final flag on arcs at: "
                + Arrays.toString(fromRoot.toArray())
                + ", label: "
                + label);
      }

      if (fsa1.isArcTerminal(arc1) != fsa2.isArcTerminal(arc2)) {
        throw new RuntimeException(
            "Different terminal flag on arcs at: "
                + Arrays.toString(fromRoot.toArray())
                + ", label: "
                + label);
      }

      if (!fsa1.isArcTerminal(arc1)) {
        checkIdentical(
            fromRoot, fsa1, fsa1.getEndNode(arc1), visited1, fsa2, fsa2.getEndNode(arc2), visited2);
      }

      fromRoot.pop();
    }
  }
}


================================================
FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSATraversalTest.java
================================================
package morfologik.fsa.builders;

import static java.nio.charset.StandardCharsets.*;
import static morfologik.fsa.MatchResult.*;
import static org.junit.jupiter.api.Assertions.*;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.HashSet;
import morfologik.fsa.FSA;
import morfologik.fsa.FSA5;
import morfologik.fsa.FSATraversal;
import morfologik.fsa.MatchResult;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

/** Tests {@link FSATraversal}. */
public final class FSATraversalTest extends TestBase {
  private FSA fsa;

  @BeforeEach
  public void setUp() throws Exception {
    fsa = FSA.read(this.getClass().getResourceAsStream("en_tst.dict"));
  }

  @Test
  public void testAutomatonHasPrefixBug() throws Exception {
    FSA fsa =
        FSABuilder.build(
            Arrays.asList(
                "a".getBytes(UTF_8),
                "ab".getBytes(UTF_8),
                "abc".getBytes(UTF_8),
                "ad".getBytes(UTF_8),
                "bcd".getBytes(UTF_8),
                "bce".getBytes(UTF_8)));

    FSATraversal fsaTraversal = new FSATraversal(fsa);
    assertEquals(EXACT_MATCH, fsaTraversal.match("a".getBytes(UTF_8)).kind);
    assertEquals(EXACT_MATCH, fsaTraversal.match("ab".getBytes(UTF_8)).kind);
    assertEquals(EXACT_MATCH, fsaTraversal.match("abc".getBytes(UTF_8)).kind);
    assertEquals(EXACT_MATCH, fsaTraversal.match("ad".getBytes(UTF_8)).kind);

    assertEquals(SEQUENCE_IS_A_PREFIX, fsaTraversal.match("b".getBytes(UTF_8)).kind);
    assertEquals(SEQUENCE_IS_A_PREFIX, fsaTraversal.match("bc".getBytes(UTF_8)).kind);

    MatchResult m;

    m = fsaTraversal.match("abcd".getBytes(UTF_8));
    assertEquals(AUTOMATON_HAS_PREFIX, m.kind);
    assertEquals(3, m.index);

    m = fsaTraversal.match("ade".getBytes(UTF_8));
    assertEquals(AUTOMATON_HAS_PREFIX, m.kind);
    assertEquals(2, m.index);

    m = fsaTraversal.match("ax".getBytes(UTF_8));
    assertEquals(AUTOMATON_HAS_PREFIX, m.kind);
    assertEquals(1, m.index);

    assertEquals(NO_MATCH, fsaTraversal.match("d".getBytes(UTF_8)).kind);
  }

  @Test
  public void testTraversalWithIterable() {
    int count = 0;
    for (ByteBuffer bb : fsa.getSequences()) {
      assertEquals(0, bb.arrayOffset());
      assertEquals(0, bb.position());
      count++;
    }
    assertEquals(346773, count);
  }

  @Test
  public void testPerfectHash() throws IOException {
    byte[][] input =
        new byte[][] {
          {'a'}, {'a', 'b', 'a'}, {'a', 'c'}, {'b'}, {'b', 'a'}, {'c'},
        };

    Arrays.sort(input, FSABuilder.LEXICAL_ORDERING);
    FSA s = FSABuilder.build(input);

    final byte[] fsaData =
        new FSA5Serializer().withNumbers().serialize(s, new ByteArrayOutputStream()).toByteArray();

    final FSA5 fsa = FSA.read(new ByteArrayInputStream(fsaData), FSA5.class);
    final FSATraversal traversal = new FSATraversal(fsa);

    int i = 0;
    for (byte[] seq : input) {
      Assertions.assertEquals(i++, traversal.perfectHash(seq));
    }

    // Check if the total number of sequences is encoded at the root node.
    assertEquals(6, fsa.getRightLanguageCount(fsa.getRootNode()));

    // Check sub/super sequence scenarios.
    assertEquals(AUTOMATON_HAS_PREFIX, traversal.perfectHash("abax".getBytes(UTF_8)));
    assertEquals(AUTOMATON_HAS_PREFIX, traversal.perfectHash("abx".getBytes(UTF_8)));
    assertEquals(SEQUENCE_IS_A_PREFIX, traversal.perfectHash("ab".getBytes(UTF_8)));
    assertEquals(NO_MATCH, traversal.perfectHash("d".getBytes(UTF_8)));
    assertEquals(NO_MATCH, traversal.perfectHash(new byte[] {0}));

    assertTrue(AUTOMATON_HAS_PREFIX < 0);
    assertTrue(SEQUENCE_IS_A_PREFIX < 0);
    assertTrue(NO_MATCH < 0);
  }

  /** */
  @Test
  public void testRecursiveTraversal() {
    final int[] counter = new int[] {0};

    class Recursion {
      public void dumpNode(final int node) {
        int arc = fsa.getFirstArc(node);
        do {
          if (fsa.isArcFinal(arc)) {
            counter[0]++;
          }

          if (!fsa.isArcTerminal(arc)) {
            dumpNode(fsa.getEndNode(arc));
          }

          arc = fsa.getNextArc(arc);
        } while (arc != 0);
      }
    }

    new Recursion().dumpNode(fsa.getRootNode());

    assertEquals(346773, counter[0]);
  }

  @Test
  public void testMatch() throws IOException {
    final FSA fsa = FSA.read(this.getClass().getResourceAsStream("abc.fsa"));
    final FSATraversal traversalHelper = new FSATraversal(fsa);

    MatchResult m = traversalHelper.match("ax".getBytes());
    assertEquals(AUTOMATON_HAS_PREFIX, m.kind);
    assertEquals(1, m.index);
    assertEquals(new HashSet<String>(Arrays.asList("ba", "c")), suffixes(fsa, m.node));

    assertEquals(EXACT_MATCH, traversalHelper.match("aba".getBytes()).kind);

    m = traversalHelper.match("abalonger".getBytes());
    assertEquals(AUTOMATON_HAS_PREFIX, m.kind);
    assertEquals("longer", "abalonger".substring(m.index));

    m = traversalHelper.match("ab".getBytes());
    assertEquals(SEQUENCE_IS_A_PREFIX, m.kind);
    assertEquals(new HashSet<String>(Arrays.asList("a")), suffixes(fsa, m.node));
  }

  /** Return all sequences reachable from a given node, as strings. */
  private HashSet<String> suffixes(FSA fsa, int node) {
    HashSet<String> result = new HashSet<String>();
    for (ByteBuffer bb : fsa.getSequences(node)) {
      result.add(new String(bb.array(), bb.position(), bb.remaining(), UTF_8));
    }
    return result;
  }
}


================================================
FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/MinMax.java
================================================
package morfologik.fsa.builders;

/** Minimum/maximum and range. */
final class MinMax {
  public final int min;
  public final int max;

  MinMax(int min, int max) {
    this.min = Math.min(min, max);
    this.max = Math.max(min, max);
  }

  public int range() {
    return max - min;
  }
}


================================================
FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/SerializerTestBase.java
================================================
package morfologik.fsa.builders;

import static morfologik.fsa.FSAFlags.*;
import static org.junit.jupiter.api.Assertions.*;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import morfologik.fsa.FSA;
import morfologik.fsa.FSAFlags;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.Test;

public abstract class SerializerTestBase extends TestBase {
  @Test
  public void testA() throws IOException {
    byte[][] input =
        new byte[][] {
          {'a'},
        };

    Arrays.sort(input, FSABuilder.LEXICAL_ORDERING);
    FSA s = FSABuilder.build(input);

    checkSerialization(input, s);
  }

  @Test
  public void testArcsSharing() throws IOException {
    byte[][] input =
        new byte[][] {
          {'a', 'c', 'f'},
          {'a', 'd', 'g'},
          {'a', 'e', 'h'},
          {'b', 'd', 'g'},
          {'b', 'e', 'h'},
        };

    Arrays.sort(input, FSABuilder.LEXICAL_ORDERING);
    FSA s = FSABuilder.build(input);

    checkSerialization(input, s);
  }

  @Test
  public void testFSA5SerializerSimple() throws IOException {
    byte[][] input =
        new byte[][] {
          {'a'}, {'a', 'b', 'a'}, {'a', 'c'}, {'b'}, {'b', 'a'}, {'c'},
        };

    Arrays.sort(input, FSABuilder.LEXICAL_ORDERING);
    FSA s = FSABuilder.build(input);

    checkSerialization(input, s);
  }

  @Test
  public void testNotMinimal() throws IOException {
    byte[][] input =
        new byte[][] {
          {'a', 'b', 'a'},
          {'b'},
          {'b', 'a'}
        };

    Arrays.sort(input, FSABuilder.LEXICAL_ORDERING);
    FSA s = FSABuilder.build(input);

    checkSerialization(input, s);
  }

  @Test
  public void testFSA5Bug0() throws IOException {
    checkCorrect(
        new String[] {
          "3-D+A+JJ", "3-D+A+NN", "4-F+A+NN", "z+A+NN",
        });
  }

  @Test
  public void testFSA5Bug1() throws IOException {
    checkCorrect(
        new String[] {
          "+NP", "n+N", "n+NP",
        });
  }

  private void checkCorrect(String[] strings) throws IOException {
    byte[][] input = new byte[strings.length][];
    for (int i = 0; i < strings.length; i++) {
      input[i] = strings[i].getBytes("ISO8859-1");
    }

    Arrays.sort(input, FSABuilder.LEXICAL_ORDERING);
    FSA s = FSABuilder.build(input);

    checkSerialization(input, s);
  }

  @Test
  public void testEmptyInput() throws IOException {
    byte[][] input = new byte[][] {};
    FSA s = FSABuilder.build(input);

    checkSerialization(input, s);
  }

  @Test
  public void test_abc() throws IOException {
    testBuiltIn(FSA.read(FSA5Test.class.getResourceAsStream("abc.fsa")));
  }

  @Test
  public void test_minimal() throws IOException {
    testBuiltIn(FSA.read(FSA5Test.class.getResourceAsStream("minimal.fsa")));
  }

  @Test
  public void test_minimal2() throws IOException {
    testBuiltIn(FSA.read(FSA5Test.class.getResourceAsStream("minimal2.fsa")));
  }

  @Test
  public void test_en_tst() throws IOException {
    testBuiltIn(FSA.read(FSA5Test.class.getResourceAsStream("en_tst.dict")));
  }

  private void testBuiltIn(FSA fsa) throws IOException {
    final ArrayList<byte[]> sequences = new ArrayList<byte[]>();

    sequences.clear();
    for (ByteBuffer bb : fsa) {
      sequences.add(Arrays.copyOf(bb.array(), bb.remaining()));
    }

    Collections.sort(sequences, FSABuilder.LEXICAL_ORDERING);

    final byte[][] in = sequences.toArray(new byte[sequences.size()][]);
    FSA root = FSABuilder.build(in);

    // Check if the DFSA is correct first.
    FSATestUtils.checkCorrect(in, root);

    // Check serialization.
    checkSerialization(in, root);
  }

  private void checkSerialization(byte[][] input, FSA root) throws IOException {
    checkSerialization0(createSerializer(), input, root);
    if (createSerializer().getFlags().contains(FSAFlags.NUMBERS)) {
      checkSerialization0(createSerializer().withNumbers(), input, root);
    }
  }

  private void checkSerialization0(FSASerializer serializer, final byte[][] in, FSA root)
      throws IOException {
    final byte[] fsaData = serializer.serialize(root, new ByteArrayOutputStream()).toByteArray();

    FSA fsa = FSA.read(new ByteArrayInputStream(fsaData));
    checkCorrect(in, fsa);
  }

  /*
   * Check if the FSA is correct with respect to the given input.
   */
  protected void checkCorrect(byte[][] input, FSA fsa) {
    // (1) All input sequences are in the right language.
    HashSet<ByteBuffer> rl = new HashSet<ByteBuffer>();
    for (ByteBuffer bb : fsa) {
      byte[] array = bb.array();
      int length = bb.remaining();
      rl.add(ByteBuffer.wrap(A
Download .txt
gitextract_1f6qqk15/

├── .github/
│   └── workflows/
│       └── ci.yml
├── .gitignore
├── CHANGES.txt
├── CONTRIBUTING.txt
├── LICENSE.txt
├── README.txt
├── etc/
│   ├── eclipse/
│   │   └── settings/
│   │       ├── org.eclipse.jdt.core.prefs
│   │       └── org.eclipse.m2e.core.prefs
│   └── forbidden-apis/
│       └── signatures.txt
├── morfologik-fsa/
│   ├── pom.xml
│   └── src/
│       └── main/
│           └── java/
│               └── morfologik/
│                   └── fsa/
│                       ├── ByteSequenceIterator.java
│                       ├── CFSA.java
│                       ├── CFSA2.java
│                       ├── FSA.java
│                       ├── FSA5.java
│                       ├── FSAFlags.java
│                       ├── FSAHeader.java
│                       ├── FSATraversal.java
│                       ├── MatchResult.java
│                       └── StateVisitor.java
├── morfologik-fsa-builders/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   └── java/
│       │       └── morfologik/
│       │           └── fsa/
│       │               └── builders/
│       │                   ├── CFSA2Serializer.java
│       │                   ├── ConstantArcSizeFSA.java
│       │                   ├── FSA5Serializer.java
│       │                   ├── FSABuilder.java
│       │                   ├── FSAInfo.java
│       │                   ├── FSASerializer.java
│       │                   └── FSAUtils.java
│       └── test/
│           ├── java/
│           │   └── morfologik/
│           │       └── fsa/
│           │           └── builders/
│           │               ├── CFSA2SerializerTest.java
│           │               ├── FSA5SerializerTest.java
│           │               ├── FSA5Test.java
│           │               ├── FSABuilderTest.java
│           │               ├── FSATestUtils.java
│           │               ├── FSATraversalTest.java
│           │               ├── MinMax.java
│           │               ├── SerializerTestBase.java
│           │               └── TestBase.java
│           └── resources/
│               └── morfologik/
│                   └── fsa/
│                       └── builders/
│                           ├── abc-numbers.fsa
│                           ├── abc.fsa
│                           ├── abc.in
│                           ├── en_tst.dict
│                           ├── minimal.fsa
│                           ├── minimal.in
│                           ├── minimal2.fsa
│                           └── minimal2.in
├── morfologik-polish/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   ├── java/
│       │   │   └── morfologik/
│       │   │       └── stemming/
│       │   │           └── polish/
│       │   │               └── PolishStemmer.java
│       │   └── resources/
│       │       └── morfologik/
│       │           └── stemming/
│       │               └── polish/
│       │                   ├── polish.LICENSE.Polish.txt
│       │                   ├── polish.LICENSE.txt
│       │                   ├── polish.README.Polish.txt
│       │                   ├── polish.README.txt
│       │                   ├── polish.dict
│       │                   └── polish.info
│       └── test/
│           └── java/
│               └── morfologik/
│                   └── stemming/
│                       └── polish/
│                           ├── Gh27Test.java
│                           └── PolishMorfologikStemmerTest.java
├── morfologik-speller/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   └── java/
│       │       └── morfologik/
│       │           └── speller/
│       │               ├── HMatrix.java
│       │               └── Speller.java
│       └── test/
│           ├── java/
│           │   └── morfologik/
│           │       └── speller/
│           │           ├── HMatrixTest.java
│           │           └── SpellerTest.java
│           └── resources/
│               └── morfologik/
│                   └── speller/
│                       ├── dict-with-freq.dict
│                       ├── dict-with-freq.info
│                       ├── dict-with-freq.txt
│                       ├── issue38.dict
│                       ├── issue38.info
│                       ├── issue38.input
│                       ├── issue94.dict
│                       ├── issue94.info
│                       ├── pissara-test.dict
│                       ├── pissara-test.info
│                       ├── pissara-test.txt
│                       ├── reps_dist2.dict
│                       ├── reps_dist2.info
│                       ├── reps_dist2.txt
│                       ├── single-char-word.dict
│                       ├── single-char-word.info
│                       ├── slownik.dict
│                       ├── slownik.info
│                       ├── test-infix.dict
│                       ├── test-infix.info
│                       ├── test-utf-spell.dict
│                       ├── test-utf-spell.info
│                       ├── test_freq_iso.dict
│                       └── test_freq_iso.info
├── morfologik-stemming/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   └── java/
│       │       └── morfologik/
│       │           └── stemming/
│       │               ├── ArrayViewList.java
│       │               ├── BufferUtils.java
│       │               ├── Dictionary.java
│       │               ├── DictionaryAttribute.java
│       │               ├── DictionaryIterator.java
│       │               ├── DictionaryLookup.java
│       │               ├── DictionaryMetadata.java
│       │               ├── DictionaryMetadataBuilder.java
│       │               ├── EncoderType.java
│       │               ├── ISequenceEncoder.java
│       │               ├── IStemmer.java
│       │               ├── NoEncoder.java
│       │               ├── TrimInfixAndSuffixEncoder.java
│       │               ├── TrimPrefixAndSuffixEncoder.java
│       │               ├── TrimSuffixEncoder.java
│       │               ├── UnmappableInputException.java
│       │               └── WordData.java
│       └── test/
│           ├── java/
│           │   └── morfologik/
│           │       └── stemming/
│           │           ├── DictionaryLookupTest.java
│           │           ├── DictionaryMetadataBuilderTest.java
│           │           ├── DictionaryMetadataTest.java
│           │           ├── DictionaryTest.java
│           │           ├── EncodersTest.java
│           │           └── SequenceEncodersTest.java
│           └── resources/
│               └── morfologik/
│                   └── stemming/
│                       ├── escape-separator.info
│                       ├── test-diacritics-utf8.dict
│                       ├── test-diacritics-utf8.info
│                       ├── test-infix.dict
│                       ├── test-infix.info
│                       ├── test-prefix.dict
│                       ├── test-prefix.info
│                       ├── test-removed-props.dict
│                       ├── test-removed-props.info
│                       ├── test-separator-in-lookup.fsa
│                       ├── test-separator-in-lookup.in
│                       ├── test-separators.dict
│                       ├── test-separators.info
│                       ├── test-separators.txt
│                       ├── test-synth.dict
│                       ├── test-synth.info
│                       └── unicode-separator.info
├── morfologik-tools/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   ├── assembly/
│       │   │   └── package.xml
│       │   ├── java/
│       │   │   └── morfologik/
│       │   │       └── tools/
│       │   │           ├── BinaryInput.java
│       │   │           ├── CliTool.java
│       │   │           ├── CustomParameterConverters.java
│       │   │           ├── DictApply.java
│       │   │           ├── DictCompile.java
│       │   │           ├── DictDecompile.java
│       │   │           ├── ExitStatus.java
│       │   │           ├── ExitStatusException.java
│       │   │           ├── FSABuild.java
│       │   │           ├── FSACompile.java
│       │   │           ├── FSADecompile.java
│       │   │           ├── FSADump.java
│       │   │           ├── FSAInfo.java
│       │   │           ├── Launcher.java
│       │   │           ├── SerializationFormat.java
│       │   │           ├── ValidateFileExists.java
│       │   │           └── ValidateParentDirExists.java
│       │   └── package/
│       │       ├── README.txt
│       │       └── examples/
│       │           ├── 01-fsa-build.input
│       │           ├── 01-fsa-build.txt
│       │           ├── 02-fsa-dump.txt
│       │           ├── 03-fsa-info.txt
│       │           ├── 04-dict-compile.info
│       │           ├── 04-dict-compile.input
│       │           ├── 04-dict-compile.txt
│       │           └── 05-dict-decompile.txt
│       └── test/
│           └── java/
│               └── morfologik/
│                   └── tools/
│                       ├── DictCompileBug.java
│                       ├── DictCompileTest.java
│                       └── FSACompileTest.java
└── pom.xml
Download .txt
SYMBOL INDEX (659 symbols across 76 files)

FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/CFSA2Serializer.java
  class CFSA2Serializer (line 38) | public final class CFSA2Serializer implements FSASerializer {
    method withNumbers (line 79) | public CFSA2Serializer withNumbers() {
    method serialize (line 90) | @Override
    method computeLabelsIndex (line 140) | private void computeLabelsIndex(final FSA fsa) {
    method getFlags (line 183) | @Override
    method linearize (line 189) | private IntArrayList linearize(final FSA fsa) throws IOException {
    method log (line 248) | private void log(Level level, String msg, Object... args) {
    method linearizeAndCalculateOffsets (line 256) | private int linearizeAndCalculateOffsets(
    method linearizeState (line 298) | private void linearizeState(
    method computeFirstStates (line 313) | private int[] computeFirstStates(IntIntHashMap inlinkCount, int maxSta...
    method computeInlinkCount (line 348) | private IntIntHashMap computeInlinkCount(final FSA fsa) {
    method emitNodes (line 373) | private int emitNodes(FSA fsa, OutputStream os, IntArrayList linearize...
    method emitNodeArcs (line 403) | private int emitNodeArcs(FSA fsa, OutputStream os, final int state, fi...
    method emitArc (line 440) | private int emitArc(OutputStream os, int flags, byte label, int target...
    method emitNodeData (line 467) | private int emitNodeData(OutputStream os, int number) throws IOExcepti...
    method withFiller (line 481) | @Override
    method withAnnotationSeparator (line 487) | @Override
    method writeVInt (line 493) | static int writeVInt(byte[] array, int offset, int value) {

FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/ConstantArcSizeFSA.java
  class ConstantArcSizeFSA (line 13) | final class ConstantArcSizeFSA extends FSA {
    method ConstantArcSizeFSA (line 56) | ConstantArcSizeFSA(byte[] data, int epsilon) {
    method getRootNode (line 63) | @Override
    method getFirstArc (line 68) | @Override
    method getArc (line 73) | @Override
    method getNextArc (line 81) | @Override
    method getArcLabel (line 87) | @Override
    method getArcTarget (line 93) | private int getArcTarget(int arc) {
    method isArcFinal (line 101) | @Override
    method isArcTerminal (line 106) | @Override
    method isArcLast (line 111) | private boolean isArcLast(int arc) {
    method getEndNode (line 115) | @Override
    method getFlags (line 120) | @Override

FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSA5Serializer.java
  class FSA5Serializer (line 29) | public final class FSA5Serializer implements FSASerializer {
    method withNumbers (line 73) | public FSA5Serializer withNumbers() {
    method withFiller (line 79) | @Override
    method withAnnotationSeparator (line 86) | @Override
    method serialize (line 98) | @Override
    method getFlags (line 151) | @Override
    method linearize (line 157) | private int[] linearize(final FSA fsa) {
    method emitArcs (line 190) | private boolean emitArcs(FSA fsa, OutputStream os, int[] linearized, i...
    method emitArc (line 256) | private int emitArc(
    method emitNodeData (line 284) | private int emitNodeData(ByteBuffer bb, OutputStream os, int nodeDataL...

FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSABuilder.java
  class FSABuilder (line 15) | public final class FSABuilder {
    type InfoEntry (line 21) | public enum InfoEntry {
      method InfoEntry (line 32) | InfoEntry(String stringified) {
      method toString (line 36) | @Override
    method compare (line 54) | public int compare(byte[] o1, byte[] o2) {
    method FSABuilder (line 116) | public FSABuilder() {
    method FSABuilder (line 123) | public FSABuilder(int bufferGrowthSize) {
    method add (line 143) | public void add(byte[] sequence, int start, int len) {
    method complete (line 188) | public FSA complete() {
    method build (line 224) | public static FSA build(byte[][] input) {
    method build (line 240) | public static FSA build(Iterable<byte[]> input) {
    method getInfo (line 254) | public Map<InfoEntry, Object> getInfo() {
    method isArcLast (line 259) | private boolean isArcLast(int arc) {
    method isArcFinal (line 264) | private boolean isArcFinal(int arc) {
    method getArcLabel (line 269) | private byte getArcLabel(int arc) {
    method setArcTarget (line 274) | private void setArcTarget(int arc, int state) {
    method getArcTarget (line 283) | private int getArcTarget(int arc) {
    method commonPrefix (line 294) | private int commonPrefix(byte[] sequence, int start, int len) {
    method freezeState (line 313) | private int freezeState(final int activePathIndex) {
    method expandAndRehash (line 339) | private void expandAndRehash() {
    method stateLength (line 357) | private int stateLength(int state) {
    method equivalent (line 366) | private boolean equivalent(int start1, int start2, int len) {
    method serialize (line 375) | private int serialize(final int activePathIndex) {
    method hash (line 388) | private int hash(int start, int byteCount) {
    method expandActivePath (line 402) | private void expandActivePath(int size) {
    method expandBuffers (line 415) | private void expandBuffers() {
    method allocateState (line 427) | private int allocateState(int labels) {
    method setPrevious (line 435) | private boolean setPrevious(byte[] sequence, int start, int length) {
    method compare (line 449) | private static int compare(byte[] s1, int start1, int lens1, byte[] s2...

FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSAInfo.java
  class FSAInfo (line 9) | public final class FSAInfo {
    class NodeVisitor (line 11) | private static class NodeVisitor {
      method NodeVisitor (line 21) | NodeVisitor(FSA fsa) {
      method visitNode (line 25) | public void visitNode(final int node) {
    class FinalStateVisitor (line 47) | private static class FinalStateVisitor {
      method FinalStateVisitor (line 52) | FinalStateVisitor(FSA fsa) {
      method visitNode (line 56) | public int visitNode(int node) {
    method FSAInfo (line 96) | public FSAInfo(FSA fsa) {
    method FSAInfo (line 120) | public FSAInfo(int nodeCount, int arcsCount, int arcsCountTotal, int f...
    method toString (line 131) | @Override

FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSASerializer.java
  type FSASerializer (line 10) | public interface FSASerializer {
    method serialize (line 20) | public <T extends OutputStream> T serialize(FSA fsa, T os) throws IOEx...
    method getFlags (line 25) | public Set<FSAFlags> getFlags();
    method withFiller (line 33) | public FSASerializer withFiller(byte filler);
    method withAnnotationSeparator (line 42) | public FSASerializer withAnnotationSeparator(byte annotationSeparator);
    method withNumbers (line 50) | public FSASerializer withNumbers();

FILE: morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSAUtils.java
  class FSAUtils (line 15) | public final class FSAUtils {
    class IntIntHolder (line 16) | public static final class IntIntHolder {
      method IntIntHolder (line 20) | public IntIntHolder(int a, int b) {
      method IntIntHolder (line 25) | public IntIntHolder() {}
    method toDot (line 36) | public static String toDot(FSA fsa, int node) {
    method toDot (line 55) | public static void toDot(Writer w, FSA fsa, int node) throws IOExcepti...
    method visitNode (line 69) | private static void visitNode(Writer w, int d, FSA fsa, int s, BitSet ...
    method calculateFanOuts (line 128) | public static TreeMap<Integer, Integer> calculateFanOuts(final FSA fsa...
    method rightLanguageForAllStates (line 169) | public static IntIntHashMap rightLanguageForAllStates(final FSA fsa) {

FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/CFSA2SerializerTest.java
  class CFSA2SerializerTest (line 4) | public class CFSA2SerializerTest extends SerializerTestBase {
    method createSerializer (line 5) | protected CFSA2Serializer createSerializer() {

FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSA5SerializerTest.java
  class FSA5SerializerTest (line 4) | public class FSA5SerializerTest extends SerializerTestBase {
    method createSerializer (line 5) | protected FSA5Serializer createSerializer() {

FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSA5Test.java
  class FSA5Test (line 18) | public final class FSA5Test extends TestBase {
    method testVersion5 (line 21) | @Test
    method testVersion5WithNumbers (line 28) | @Test
    method testArcsAndNodes (line 36) | @Test
    method testNumbers (line 51) | @Test
    method walkNode (line 66) | public static void walkNode(
    method verifyContent (line 87) | private static void verifyContent(List<String> expected, FSA fsa) thro...

FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSABuilderTest.java
  class FSABuilderTest (line 13) | public class FSABuilderTest extends TestBase {
    method prepareByteInput (line 17) | @BeforeAll
    method testEmptyInput (line 23) | @Test
    method testHashResizeBug (line 29) | @Test
    method testSmallInput (line 40) | @Test
    method testLexicographicOrder (line 48) | @Test
    method testRandom25000_largerAlphabet (line 71) | @Test
    method testRandom25000_smallAlphabet (line 78) | @Test

FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSATestUtils.java
  class FSATestUtils (line 10) | public class FSATestUtils {
    method generateRandom (line 14) | public static byte[][] generateRandom(Random rnd, int count, MinMax le...
    method randomByteSequence (line 24) | private static byte[] randomByteSequence(Random rnd, MinMax length, Mi...
    method checkCorrect (line 35) | public static void checkCorrect(byte[][] input, FSA fsa) {
    method checkMinimal (line 61) | public static void checkMinimal(final FSA fsa) {
    method allSequences (line 89) | static List<byte[]> allSequences(FSA fsa, int state) {
    method checkIdentical (line 100) | public static void checkIdentical(FSA fsa1, FSA fsa2) {
    method checkIdentical (line 109) | static void checkIdentical(

FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSATraversalTest.java
  class FSATraversalTest (line 22) | public final class FSATraversalTest extends TestBase {
    method setUp (line 25) | @BeforeEach
    method testAutomatonHasPrefixBug (line 30) | @Test
    method testTraversalWithIterable (line 68) | @Test
    method testPerfectHash (line 79) | @Test
    method testRecursiveTraversal (line 116) | @Test
    method testMatch (line 142) | @Test
    method suffixes (line 164) | private HashSet<String> suffixes(FSA fsa, int node) {

FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/MinMax.java
  class MinMax (line 4) | final class MinMax {
    method MinMax (line 8) | MinMax(int min, int max) {
    method range (line 13) | public int range() {

FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/SerializerTestBase.java
  class SerializerTestBase (line 19) | public abstract class SerializerTestBase extends TestBase {
    method testA (line 20) | @Test
    method testArcsSharing (line 33) | @Test
    method testFSA5SerializerSimple (line 50) | @Test
    method testNotMinimal (line 63) | @Test
    method testFSA5Bug0 (line 78) | @Test
    method testFSA5Bug1 (line 86) | @Test
    method checkCorrect (line 94) | private void checkCorrect(String[] strings) throws IOException {
    method testEmptyInput (line 106) | @Test
    method test_abc (line 114) | @Test
    method test_minimal (line 119) | @Test
    method test_minimal2 (line 124) | @Test
    method test_en_tst (line 129) | @Test
    method testBuiltIn (line 134) | private void testBuiltIn(FSA fsa) throws IOException {
    method checkSerialization (line 154) | private void checkSerialization(byte[][] input, FSA root) throws IOExc...
    method checkSerialization0 (line 161) | private void checkSerialization0(FSASerializer serializer, final byte[...
    method checkCorrect (line 172) | protected void checkCorrect(byte[][] input, FSA fsa) {
    method testAutomatonWithNodeNumbers (line 197) | @Test
    method createSerializer (line 226) | protected abstract FSASerializer createSerializer();
    method toString (line 231) | public static String toString(ByteBuffer sequence) {

FILE: morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/TestBase.java
  class TestBase (line 7) | @Randomized
    class CustomThreadFilter (line 13) | public static class CustomThreadFilter implements Predicate<Thread> {
      method test (line 14) | @Override

FILE: morfologik-fsa/src/main/java/morfologik/fsa/ByteSequenceIterator.java
  class ByteSequenceIterator (line 10) | public final class ByteSequenceIterator implements Iterator<ByteBuffer> {
    method ByteSequenceIterator (line 40) | public ByteSequenceIterator(FSA fsa) {
    method ByteSequenceIterator (line 50) | public ByteSequenceIterator(FSA fsa, int node) {
    method restartFrom (line 64) | public ByteSequenceIterator restartFrom(int node) {
    method hasNext (line 74) | @Override
    method next (line 87) | @Override
    method advance (line 103) | private final ByteBuffer advance() {
    method remove (line 146) | @Override
    method pushNode (line 152) | private void pushNode(int node) {

FILE: morfologik-fsa/src/main/java/morfologik/fsa/CFSA.java
  class CFSA (line 107) | public final class CFSA extends FSA {
    method CFSA (line 154) | CFSA(InputStream stream) throws IOException {
    method getRootNode (line 192) | @Override
    method getFirstArc (line 202) | @Override
    method getNextArc (line 208) | @Override
    method getArc (line 215) | @Override
    method getEndNode (line 226) | @Override
    method getArcLabel (line 236) | @Override
    method getRightLanguageCount (line 246) | @Override
    method isArcFinal (line 253) | @Override
    method isArcTerminal (line 259) | @Override
    method isArcLast (line 271) | public boolean isArcLast(int arc) {
    method isNextSet (line 280) | public boolean isNextSet(int arc) {
    method isLabelCompressed (line 288) | public boolean isLabelCompressed(int arc) {
    method getFlags (line 299) | public Set<FSAFlags> getFlags() {
    method getDestinationNodeOffset (line 304) | final int getDestinationNodeOffset(int arc) {
    method skipArc (line 323) | private int skipArc(int offset) {

FILE: morfologik-fsa/src/main/java/morfologik/fsa/CFSA2.java
  class CFSA2 (line 87) | public final class CFSA2 extends FSA {
    method CFSA2 (line 131) | CFSA2(InputStream stream) throws IOException {
    method getRootNode (line 163) | @Override
    method getFirstArc (line 170) | @Override
    method getNextArc (line 180) | @Override
    method getArc (line 190) | @Override
    method getEndNode (line 203) | @Override
    method getArcLabel (line 212) | @Override
    method getRightLanguageCount (line 223) | @Override
    method isArcFinal (line 230) | @Override
    method isArcTerminal (line 236) | @Override
    method isArcLast (line 248) | public boolean isArcLast(int arc) {
    method isNextSet (line 257) | public boolean isNextSet(int arc) {
    method getFlags (line 262) | public Set<FSAFlags> getFlags() {
    method getDestinationNodeOffset (line 267) | final int getDestinationNodeOffset(int arc) {
    method skipArc (line 286) | private int skipArc(int offset) {
    method readVInt (line 304) | static int readVInt(byte[] array, int offset) {
    method vIntLength (line 317) | static int vIntLength(int value) {
    method skipVInt (line 329) | private int skipVInt(int offset) {

FILE: morfologik-fsa/src/main/java/morfologik/fsa/FSA.java
  class FSA (line 19) | public abstract class FSA implements Iterable<ByteBuffer> {
    method getRootNode (line 24) | public abstract int getRootNode();
    method getFirstArc (line 31) | public abstract int getFirstArc(int node);
    method getNextArc (line 38) | public abstract int getNextArc(int arc);
    method getArc (line 47) | public abstract int getArc(int node, byte label);
    method getArcLabel (line 53) | public abstract byte getArcLabel(int arc);
    method isArcFinal (line 60) | public abstract boolean isArcFinal(int arc);
    method isArcTerminal (line 67) | public abstract boolean isArcTerminal(int arc);
    method getEndNode (line 74) | public abstract int getEndNode(int arc);
    method getFlags (line 79) | public abstract Set<FSAFlags> getFlags();
    method getArcCount (line 85) | public int getArcCount(int node) {
    method getRightLanguageCount (line 101) | public int getRightLanguageCount(int node) {
    method getSequences (line 120) | public Iterable<ByteBuffer> getSequences(final int node) {
    method getSequences (line 137) | public final Iterable<ByteBuffer> getSequences() {
    method iterator (line 150) | public final Iterator<ByteBuffer> iterator() {
    method visitAllStates (line 163) | public <T extends StateVisitor> T visitAllStates(T v) {
    method visitInPostOrder (line 174) | public <T extends StateVisitor> T visitInPostOrder(T v) {
    method visitInPostOrder (line 187) | public <T extends StateVisitor> T visitInPostOrder(T v, int node) {
    method visitInPostOrder (line 193) | private boolean visitInPostOrder(StateVisitor v, int node, BitSet visi...
    method visitInPreOrder (line 213) | public <T extends StateVisitor> T visitInPreOrder(T v) {
    method visitInPreOrder (line 226) | public <T extends StateVisitor> T visitInPreOrder(T v, int node) {
    method readRemaining (line 236) | protected static final byte[] readRemaining(InputStream in) throws IOE...
    method visitInPreOrder (line 247) | private void visitInPreOrder(StateVisitor v, int node, BitSet visited) {
    method read (line 270) | public static FSA read(InputStream stream) throws IOException {
    method read (line 298) | public static <T extends FSA> T read(InputStream stream, Class<? exten...

FILE: morfologik-fsa/src/main/java/morfologik/fsa/FSA5.java
  class FSA5 (line 65) | public final class FSA5 extends FSA {
    method FSA5 (line 124) | FSA5(InputStream stream) throws IOException {
    method getRootNode (line 149) | @Override
    method getFirstArc (line 159) | @Override
    method getNextArc (line 165) | @Override
    method getArc (line 172) | @Override
    method getEndNode (line 183) | @Override
    method getArcLabel (line 191) | @Override
    method isArcFinal (line 197) | @Override
    method isArcTerminal (line 203) | @Override
    method getRightLanguageCount (line 212) | @Override
    method getFlags (line 224) | @Override
    method isArcLast (line 236) | public boolean isArcLast(int arc) {
    method isNextSet (line 245) | public boolean isNextSet(int arc) {
    method decodeFromBytes (line 250) | static final int decodeFromBytes(final byte[] arcs, final int start, f...
    method getDestinationNodeOffset (line 259) | final int getDestinationNodeOffset(int arc) {
    method skipArc (line 273) | private int skipArc(int offset) {

FILE: morfologik-fsa/src/main/java/morfologik/fsa/FSAFlags.java
  type FSAFlags (line 6) | public enum FSAFlags {
    method FSAFlags (line 40) | private FSAFlags(int bits) {
    method isSet (line 48) | public boolean isSet(int flags) {
    method asShort (line 56) | public static short asShort(Set<FSAFlags> flags) {

FILE: morfologik-fsa/src/main/java/morfologik/fsa/FSAHeader.java
  class FSAHeader (line 8) | public final class FSAHeader {
    method FSAHeader (line 18) | FSAHeader(byte version) {
    method read (line 29) | public static FSAHeader read(InputStream in) throws IOException {
    method write (line 52) | public static void write(OutputStream os, byte version) throws IOExcep...

FILE: morfologik-fsa/src/main/java/morfologik/fsa/FSATraversal.java
  class FSATraversal (line 6) | public final class FSATraversal {
    method FSATraversal (line 15) | public FSATraversal(FSA fsa) {
    method perfectHash (line 34) | public int perfectHash(byte[] sequence, int start, int length, int nod...
    method perfectHash (line 98) | public int perfectHash(byte[] sequence) {
    method match (line 115) | public MatchResult match(MatchResult reuse, byte[] sequence, int start...
    method match (line 167) | public MatchResult match(byte[] sequence, int start, int length, int n...
    method match (line 178) | public MatchResult match(byte[] sequence, int node) {
    method match (line 187) | public MatchResult match(byte[] sequence) {

FILE: morfologik-fsa/src/main/java/morfologik/fsa/MatchResult.java
  class MatchResult (line 8) | public final class MatchResult {
    method MatchResult (line 53) | MatchResult(int kind, int index, int node) {
    method MatchResult (line 57) | MatchResult(int kind) {
    method MatchResult (line 61) | public MatchResult() {
    method reset (line 65) | final void reset(int kind, int index, int node) {

FILE: morfologik-fsa/src/main/java/morfologik/fsa/StateVisitor.java
  type StateVisitor (line 9) | public interface StateVisitor {
    method accept (line 10) | public boolean accept(int state);

FILE: morfologik-polish/src/main/java/morfologik/stemming/polish/PolishStemmer.java
  class PolishStemmer (line 20) | public final class PolishStemmer implements IStemmer, Iterable<WordData> {
    method PolishStemmer (line 27) | public PolishStemmer() {
    method getDictionary (line 55) | public Dictionary getDictionary() {
    method lookup (line 60) | public List<WordData> lookup(CharSequence word) {
    method iterator (line 65) | public Iterator<WordData> iterator() {

FILE: morfologik-polish/src/test/java/morfologik/stemming/polish/Gh27Test.java
  class Gh27Test (line 11) | public class Gh27Test {
    method gh27 (line 13) | @Test

FILE: morfologik-polish/src/test/java/morfologik/stemming/polish/PolishMorfologikStemmerTest.java
  class PolishMorfologikStemmerTest (line 18) | public class PolishMorfologikStemmerTest {
    method testLexemes (line 20) | @Test
    method listUniqueTags (line 36) | @Test
    method testWordDataFields (line 54) | @Test
    method asString (line 122) | public static String asString(CharSequence s) {
    method stem (line 128) | public static String[] stem(IStemmer s, String word) {
    method assertNoStemFor (line 138) | public static void assertNoStemFor(IStemmer s, String word) {

FILE: morfologik-speller/src/main/java/morfologik/speller/HMatrix.java
  class HMatrix (line 9) | public class HMatrix {
    method HMatrix (line 24) | public HMatrix(final int distance, final int maxLength) {
    method init (line 33) | private void init() {
    method reset (line 47) | public void reset() {
    method get (line 77) | public int get(final int i, final int j) {
    method set (line 88) | public void set(final int i, final int j, final int val) {

FILE: morfologik-speller/src/main/java/morfologik/speller/Speller.java
  class Speller (line 31) | public class Speller {
    class Pattern (line 55) | private static final class Pattern {
      method Pattern (line 60) | Pattern(char[] chars, boolean startAnchor, boolean endAnchor) {
    method Speller (line 109) | public Speller(final Dictionary dictionary) {
    method Speller (line 113) | public Speller(final Dictionary dictionary, final int editDistance) {
    method isStartAnchored (line 140) | private static boolean isStartAnchored(String key) {
    method isEndAnchored (line 144) | private static boolean isEndAnchored(String key) {
    method stripAnchors (line 148) | private static String stripAnchors(String key) {
    method createReplacementsMaps (line 154) | private void createReplacementsMaps() {
    method charSequenceToBytes (line 198) | private ByteBuffer charSequenceToBytes(final CharSequence word) throws...
    method isMisspelled (line 235) | public boolean isMisspelled(final String word) {
    method initialUppercase (line 258) | private CharSequence initialUppercase(final String wordToCheck) {
    method isInDictionary (line 269) | public boolean isInDictionary(final CharSequence word) {
    method getFrequency (line 307) | public int getFrequency(final CharSequence word) {
    method replaceRunOnWordCandidates (line 344) | public List<CandidateData> replaceRunOnWordCandidates(final String ori...
    method replaceRunOnWords (line 381) | public List<String> replaceRunOnWords(final String original) {
    method addReplacement (line 390) | private void addReplacement(List<CandidateData> candidates, String rep...
    method findSimilarWordCandidates (line 408) | public ArrayList<CandidateData> findSimilarWordCandidates(String word) {
    method findSimilarWords (line 412) | public ArrayList<String> findSimilarWords(String word) {
    method findReplacements (line 428) | public ArrayList<String> findReplacements(String word) {
    method findReplacementCandidates (line 446) | public ArrayList<CandidateData> findReplacementCandidates(String word) {
    method findReplacementCandidates (line 450) | private ArrayList<CandidateData> findReplacementCandidates(
    method findRepl (line 535) | private void findRepl(
    method isArcNotTerminal (line 667) | private boolean isArcNotTerminal(final int arc, final int candIndex) {
    method isEndOfCandidate (line 672) | private boolean isEndOfCandidate(final int arc, final int wordIndex) {
    method isBeforeSeparator (line 678) | private boolean isBeforeSeparator(final int arc) {
    method ed (line 695) | public int ed(final int i, final int j, final int wordIndex, final int...
    method areEqual (line 724) | private boolean areEqual(final char x, final char y) {
    method cuted (line 765) | public int cuted(final int depth, final int wordIndex, final int candI...
    method matchAnyToOne (line 785) | private int matchAnyToOne(final int wordIndex, final int candIndex) {
    method matchAnyToTwo (line 804) | private int matchAnyToTwo(
    method min (line 847) | private static int min(final int a, final int b, final int c) {
    method isAlphabetic (line 857) | static boolean isAlphabetic(final int codePoint) {
    method containsNoDigit (line 875) | static boolean containsNoDigit(final String s) {
    method isAllUppercase (line 888) | boolean isAllUppercase(final String str) {
    method isNotAllLowercase (line 902) | boolean isNotAllLowercase(final String str) {
    method isNotCapitalizedWord (line 915) | boolean isNotCapitalizedWord(final String str) {
    method isNotEmpty (line 934) | static boolean isNotEmpty(final String str) {
    method isMixedCase (line 942) | boolean isMixedCase(final String str) {
    method isCamelCase (line 951) | public boolean isCamelCase(final String str) {
    method convertsCase (line 966) | public boolean convertsCase() {
    method getAllReplacements (line 976) | public List<String> getAllReplacements(final String str, final int fro...
    method setWordAndCandidate (line 1052) | void setWordAndCandidate(final String word, final String candidate) {
    method getWordLen (line 1060) | public final int getWordLen() {
    method getCandLen (line 1064) | public final int getCandLen() {
    method getEffectiveED (line 1068) | public final int getEffectiveED() {
    class CandidateData (line 1076) | public final class CandidateData implements Comparable<CandidateData> {
      method CandidateData (line 1081) | CandidateData(final String word, final int distance) {
      method getWord (line 1087) | public final String getWord() {
      method getDistance (line 1091) | public final int getDistance() {
      method compareTo (line 1095) | @Override
      method toString (line 1101) | @Override

FILE: morfologik-speller/src/test/java/morfologik/speller/HMatrixTest.java
  class HMatrixTest (line 7) | public class HMatrixTest {
    method stressTestInit (line 10) | @Test

FILE: morfologik-speller/src/test/java/morfologik/speller/SpellerTest.java
  class SpellerTest (line 16) | public class SpellerTest {
    method setup (line 19) | @BeforeAll
    method testRunonWords (line 36) | @Test
    method testIsInDictionary (line 67) | @Test
    method testFindReplacements (line 88) | @Test
    method testFrequencyNonUTFDictionary (line 121) | @Test
    method testFindReplacementsInUTF (line 129) | @Test
    method testFindReplacementsUsingFrequency (line 156) | @Test
    method testFindSimilarWords (line 176) | @Test
    method testConcurrentReplacements (line 191) | @Test
    method testIsMisspelled (line 202) | @Test
    method testCamelCase (line 225) | @Test
    method testCapitalizedWord (line 238) | @Test
    method testGetAllReplacements (line 250) | @Test
    method testEditDistanceCalculation (line 260) | @Test
    method testCutOffEditDistance (line 275) | @Test
    method testReplacementsAndDistance2 (line 283) | @Test
    method testFindReplacementsConsistentAcrossRepeatedCalls (line 351) | @Test
    method testIssue38AnchoredReplacementPairs (line 366) | @Test
    method testIssue94 (line 384) | @Test
    method testReciprocalReplacementPairsDoNotProduceZeroDistance (line 393) | @Test
    method getCutOffDistance (line 422) | private int getCutOffDistance(final Speller spell, final String word, ...
    method getEditDistance (line 437) | private int getEditDistance(final Speller spell, final String word, fi...

FILE: morfologik-stemming/src/main/java/morfologik/stemming/ArrayViewList.java
  class ArrayViewList (line 6) | @SuppressWarnings("serial")
    method ArrayViewList (line 17) | ArrayViewList(E[] array, int start, int length) {
    method size (line 25) | public int size() {
    method get (line 32) | public E get(int index) {
    method set (line 39) | public E set(int index, E element) {
    method add (line 46) | public void add(int index, E element) {
    method remove (line 53) | public E remove(int index) {
    method addAll (line 60) | public boolean addAll(int index, Collection<? extends E> c) {
    method indexOf (line 67) | public int indexOf(Object o) {
    method listIterator (line 76) | public ListIterator<E> listIterator() {
    method listIterator (line 83) | public ListIterator<E> listIterator(final int index) {
    method contains (line 90) | public boolean contains(Object o) {
    method wrap (line 97) | void wrap(E[] array, int start, int length) {

FILE: morfologik-stemming/src/main/java/morfologik/stemming/BufferUtils.java
  class BufferUtils (line 13) | public final class BufferUtils {
    method BufferUtils (line 15) | private BufferUtils() {
    method clearAndEnsureCapacity (line 27) | public static ByteBuffer clearAndEnsureCapacity(ByteBuffer buffer, int...
    method clearAndEnsureCapacity (line 44) | public static CharBuffer clearAndEnsureCapacity(CharBuffer buffer, int...
    method toString (line 58) | public static String toString(ByteBuffer buffer, Charset charset) {
    method toString (line 65) | public static String toString(CharBuffer buffer) {
    method toArray (line 76) | public static byte[] toArray(ByteBuffer buffer) {
    method sharedPrefixLength (line 85) | static int sharedPrefixLength(ByteBuffer a, int aStart, ByteBuffer b, ...
    method sharedPrefixLength (line 97) | static int sharedPrefixLength(ByteBuffer a, ByteBuffer b) {
    method bytesToChars (line 105) | public static CharBuffer bytesToChars(
    method charsToBytes (line 139) | public static ByteBuffer charsToBytes(CharsetEncoder encoder, CharBuff...

FILE: morfologik-stemming/src/main/java/morfologik/stemming/Dictionary.java
  class Dictionary (line 22) | public final class Dictionary {
    method Dictionary (line 37) | public Dictionary(FSA fsa, DictionaryMetadata metadata) {
    method read (line 50) | public static Dictionary read(Path location) throws IOException {
    method read (line 67) | public static Dictionary read(URL dictURL) throws IOException {
    method read (line 91) | public static Dictionary read(InputStream fsaStream, InputStream metad...

FILE: morfologik-stemming/src/main/java/morfologik/stemming/DictionaryAttribute.java
  type DictionaryAttribute (line 13) | public enum DictionaryAttribute {
    method fromString (line 16) | @Override
    method fromString (line 35) | @Override
    method fromString (line 43) | @Override
    method fromString (line 51) | @Override
    method fromString (line 59) | @Override
    method fromString (line 67) | @Override
    method fromString (line 75) | @Override
    method fromString (line 86) | @Override
    method fromString (line 94) | @Override
    method fromString (line 102) | @Override
    method fromString (line 110) | @Override
    method fromString (line 118) | @Override
    method fromString (line 137) | @Override
    method fromString (line 167) | @Override
    method fromString (line 196) | @Override
    method fromString (line 230) | @Override
    method fromString (line 274) | public Object fromString(String value) throws IllegalArgumentException {
    method fromPropertyName (line 282) | public static DictionaryAttribute fromPropertyName(String propertyName) {
    method DictionaryAttribute (line 302) | private DictionaryAttribute(String propertyName) {
    method booleanValue (line 306) | private static Boolean booleanValue(String value) {

FILE: morfologik-stemming/src/main/java/morfologik/stemming/DictionaryIterator.java
  class DictionaryIterator (line 12) | public final class DictionaryIterator implements Iterator<WordData> {
    method DictionaryIterator (line 24) | public DictionaryIterator(Dictionary dictionary, CharsetDecoder decode...
    method hasNext (line 33) | public boolean hasNext() {
    method next (line 37) | public WordData next() {
    method remove (line 108) | public void remove() {

FILE: morfologik-stemming/src/main/java/morfologik/stemming/DictionaryLookup.java
  class DictionaryLookup (line 23) | public final class DictionaryLookup implements IStemmer, Iterable<WordDa...
    method DictionaryLookup (line 85) | public DictionaryLookup(Dictionary dictionary) throws IllegalArgumentE...
    method lookup (line 108) | @Override
    method applyReplacements (line 237) | public static String applyReplacements(
    method iterator (line 256) | @Override
    method getDictionary (line 264) | public Dictionary getDictionary() {
    method getSeparatorChar (line 274) | public char getSeparatorChar() {

FILE: morfologik-stemming/src/main/java/morfologik/stemming/DictionaryMetadata.java
  class DictionaryMetadata (line 30) | public final class DictionaryMetadata {
    method getAttributes (line 94) | public Map<DictionaryAttribute, String> getAttributes() {
    method getEncoding (line 99) | public String getEncoding() {
    method getSeparator (line 103) | public byte getSeparator() {
    method getLocale (line 107) | public Locale getLocale() {
    method getInputConversionPairs (line 111) | public LinkedHashMap<String, String> getInputConversionPairs() {
    method getOutputConversionPairs (line 115) | public LinkedHashMap<String, String> getOutputConversionPairs() {
    method getReplacementPairs (line 119) | public LinkedHashMap<String, List<String>> getReplacementPairs() {
    method getEquivalentChars (line 123) | public LinkedHashMap<Character, List<Character>> getEquivalentChars() {
    method isFrequencyIncluded (line 128) | public boolean isFrequencyIncluded() {
    method isIgnoringPunctuation (line 132) | public boolean isIgnoringPunctuation() {
    method isIgnoringNumbers (line 136) | public boolean isIgnoringNumbers() {
    method isIgnoringCamelCase (line 140) | public boolean isIgnoringCamelCase() {
    method isIgnoringAllUppercase (line 144) | public boolean isIgnoringAllUppercase() {
    method isIgnoringDiacritics (line 148) | public boolean isIgnoringDiacritics() {
    method isConvertingCase (line 152) | public boolean isConvertingCase() {
    method isSupportingRunOnWords (line 156) | public boolean isSupportingRunOnWords() {
    method DictionaryMetadata (line 166) | public DictionaryMetadata(Map<DictionaryAttribute, String> attrs) {
    method getDecoder (line 290) | public CharsetDecoder getDecoder() {
    method getEncoder (line 304) | public CharsetEncoder getEncoder() {
    method getSequenceEncoderType (line 318) | public EncoderType getSequenceEncoderType() {
    method getSeparatorAsChar (line 327) | public char getSeparatorAsChar() {
    method builder (line 334) | public static DictionaryMetadataBuilder builder() {
    method getExpectedMetadataFileName (line 346) | public static String getExpectedMetadataFileName(String dictionaryFile) {
    method getExpectedMetadataLocation (line 362) | public static Path getExpectedMetadataLocation(Path dictionary) {
    method read (line 374) | public static DictionaryMetadata read(InputStream metadataStream) thro...
    method write (line 434) | public void write(Writer writer) throws IOException {

FILE: morfologik-stemming/src/main/java/morfologik/stemming/DictionaryMetadataBuilder.java
  class DictionaryMetadataBuilder (line 10) | public final class DictionaryMetadataBuilder {
    method separator (line 14) | public DictionaryMetadataBuilder separator(char c) {
    method encoding (line 19) | public DictionaryMetadataBuilder encoding(Charset charset) {
    method encoding (line 23) | public DictionaryMetadataBuilder encoding(String charsetName) {
    method frequencyIncluded (line 28) | public DictionaryMetadataBuilder frequencyIncluded() {
    method frequencyIncluded (line 32) | public DictionaryMetadataBuilder frequencyIncluded(boolean v) {
    method ignorePunctuation (line 37) | public DictionaryMetadataBuilder ignorePunctuation() {
    method ignorePunctuation (line 41) | public DictionaryMetadataBuilder ignorePunctuation(boolean v) {
    method ignoreNumbers (line 46) | public DictionaryMetadataBuilder ignoreNumbers() {
    method ignoreNumbers (line 50) | public DictionaryMetadataBuilder ignoreNumbers(boolean v) {
    method ignoreCamelCase (line 55) | public DictionaryMetadataBuilder ignoreCamelCase() {
    method ignoreCamelCase (line 59) | public DictionaryMetadataBuilder ignoreCamelCase(boolean v) {
    method ignoreAllUppercase (line 64) | public DictionaryMetadataBuilder ignoreAllUppercase() {
    method ignoreAllUppercase (line 68) | public DictionaryMetadataBuilder ignoreAllUppercase(boolean v) {
    method ignoreDiacritics (line 73) | public DictionaryMetadataBuilder ignoreDiacritics() {
    method ignoreDiacritics (line 77) | public DictionaryMetadataBuilder ignoreDiacritics(boolean v) {
    method convertCase (line 82) | public DictionaryMetadataBuilder convertCase() {
    method convertCase (line 86) | public DictionaryMetadataBuilder convertCase(boolean v) {
    method supportRunOnWords (line 91) | public DictionaryMetadataBuilder supportRunOnWords() {
    method supportRunOnWords (line 95) | public DictionaryMetadataBuilder supportRunOnWords(boolean v) {
    method encoder (line 100) | public DictionaryMetadataBuilder encoder(EncoderType type) {
    method locale (line 105) | public DictionaryMetadataBuilder locale(Locale locale) {
    method locale (line 109) | public DictionaryMetadataBuilder locale(String localeName) {
    method withReplacementPairs (line 114) | public DictionaryMetadataBuilder withReplacementPairs(
    method withEquivalentChars (line 128) | public DictionaryMetadataBuilder withEquivalentChars(
    method withInputConversionPairs (line 142) | public DictionaryMetadataBuilder withInputConversionPairs(Map<String, ...
    method withOutputConversionPairs (line 153) | public DictionaryMetadataBuilder withOutputConversionPairs(Map<String,...
    method author (line 164) | public DictionaryMetadataBuilder author(String author) {
    method creationDate (line 169) | public DictionaryMetadataBuilder creationDate(String creationDate) {
    method license (line 174) | public DictionaryMetadataBuilder license(String license) {
    method build (line 179) | public DictionaryMetadata build() {
    method toMap (line 183) | public EnumMap<DictionaryAttribute, String> toMap() {

FILE: morfologik-stemming/src/main/java/morfologik/stemming/EncoderType.java
  type EncoderType (line 4) | public enum EncoderType {
    method get (line 6) | @Override
    method get (line 12) | @Override
    method get (line 18) | @Override
    method get (line 24) | @Override
    method get (line 30) | public abstract ISequenceEncoder get();

FILE: morfologik-stemming/src/main/java/morfologik/stemming/ISequenceEncoder.java
  type ISequenceEncoder (line 14) | public interface ISequenceEncoder {
    method encode (line 25) | public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuff...
    method decode (line 37) | public ByteBuffer decode(ByteBuffer reuse, ByteBuffer source, ByteBuff...
    method prefixBytes (line 46) | @Deprecated

FILE: morfologik-stemming/src/main/java/morfologik/stemming/IStemmer.java
  type IStemmer (line 6) | public interface IStemmer {
    method lookup (line 18) | public List<WordData> lookup(CharSequence word);

FILE: morfologik-stemming/src/main/java/morfologik/stemming/NoEncoder.java
  class NoEncoder (line 6) | public class NoEncoder implements ISequenceEncoder {
    method encode (line 7) | @Override
    method decode (line 18) | @Override
    method prefixBytes (line 29) | @Override
    method toString (line 34) | @Override

FILE: morfologik-stemming/src/main/java/morfologik/stemming/TrimInfixAndSuffixEncoder.java
  class TrimInfixAndSuffixEncoder (line 29) | public class TrimInfixAndSuffixEncoder implements ISequenceEncoder {
    method encode (line 35) | public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuff...
    method prefixBytes (line 105) | @Override
    method decode (line 110) | public ByteBuffer decode(ByteBuffer reuse, ByteBuffer source, ByteBuff...
    method toString (line 140) | @Override

FILE: morfologik-stemming/src/main/java/morfologik/stemming/TrimPrefixAndSuffixEncoder.java
  class TrimPrefixAndSuffixEncoder (line 29) | public class TrimPrefixAndSuffixEncoder implements ISequenceEncoder {
    method encode (line 33) | public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuff...
    method prefixBytes (line 70) | @Override
    method decode (line 75) | public ByteBuffer decode(ByteBuffer reuse, ByteBuffer source, ByteBuff...
    method toString (line 102) | @Override

FILE: morfologik-stemming/src/main/java/morfologik/stemming/TrimSuffixEncoder.java
  class TrimSuffixEncoder (line 28) | public class TrimSuffixEncoder implements ISequenceEncoder {
    method encode (line 32) | public ByteBuffer encode(ByteBuffer reuse, ByteBuffer source, ByteBuff...
    method prefixBytes (line 53) | @Override
    method decode (line 58) | public ByteBuffer decode(ByteBuffer reuse, ByteBuffer source, ByteBuff...
    method toString (line 81) | @Override

FILE: morfologik-stemming/src/main/java/morfologik/stemming/UnmappableInputException.java
  class UnmappableInputException (line 9) | @SuppressWarnings("serial")
    method UnmappableInputException (line 11) | UnmappableInputException(String message, CharacterCodingException caus...

FILE: morfologik-stemming/src/main/java/morfologik/stemming/WordData.java
  class WordData (line 23) | public final class WordData implements Cloneable {
    method WordData (line 51) | WordData(CharsetDecoder decoder) {
    method WordData (line 61) | WordData(String stem, String tag, String encoding) {
    method getStemBytes (line 83) | public ByteBuffer getStemBytes(ByteBuffer target) {
    method getTagBytes (line 103) | public ByteBuffer getTagBytes(ByteBuffer target) {
    method getWordBytes (line 123) | public ByteBuffer getWordBytes(ByteBuffer target) {
    method getTag (line 136) | public CharSequence getTag() {
    method getStem (line 145) | public CharSequence getStem() {
    method getWord (line 154) | public CharSequence getWord() {
    method equals (line 161) | @Override
    method hashCode (line 169) | @Override
    method toString (line 174) | @Override
    method clone (line 183) | @Override
    method cloneCharSequence (line 194) | private CharSequence cloneCharSequence(CharSequence chs) {
    method update (line 199) | void update(ByteBuffer wordBuffer, CharSequence word) {

FILE: morfologik-stemming/src/test/java/morfologik/stemming/DictionaryLookupTest.java
  class DictionaryLookupTest (line 16) | public class DictionaryLookupTest {
    method testApplyReplacements (line 17) | @Test
    method testRemovedEncoderProperties (line 31) | @Test
    method testPrefixDictionaries (line 42) | @Test
    method testInputConversion (line 54) | @Test
    method testInfixDictionaries (line 67) | @Test
    method testWordDataIterator (line 88) | @Test
    method testWordDataCloning (line 110) | @Test
    method assertEqualSequences (line 141) | private void assertEqualSequences(CharSequence s1, CharSequence s2) {
    method testMultibyteEncodingUTF8 (line 146) | @Test
    method testSynthesis (line 157) | @Test
    method testInputWithSeparators (line 171) | @Test
    method testSeparatorInLookupTerm (line 196) | @Test
    method testGetSeparator (line 212) | @Test
    method asString (line 220) | public static String asString(CharSequence s) {
    method stem (line 226) | public static String[] stem(IStemmer s, String word) {
    method assertNoStemFor (line 236) | public static void assertNoStemFor(IStemmer s, String word) {

FILE: morfologik-stemming/src/test/java/morfologik/stemming/DictionaryMetadataBuilderTest.java
  class DictionaryMetadataBuilderTest (line 13) | public class DictionaryMetadataBuilderTest {
    method testAllConstantsHaveBuilderMethods (line 14) | @Test

FILE: morfologik-stemming/src/test/java/morfologik/stemming/DictionaryMetadataTest.java
  class DictionaryMetadataTest (line 16) | @Randomized
    method testEscapeSeparator (line 18) | @Test
    method testUnicodeSeparator (line 25) | @Test
    method testWriteMetadata (line 32) | @Test

FILE: morfologik-stemming/src/test/java/morfologik/stemming/DictionaryTest.java
  class DictionaryTest (line 14) | @Randomized
    method testReadFromFile (line 16) | @Test

FILE: morfologik-stemming/src/test/java/morfologik/stemming/EncodersTest.java
  class EncodersTest (line 11) | @Randomized
    method testSharedPrefix (line 13) | @Test
    method b (line 30) | private static byte[] b(String arg) {

FILE: morfologik-stemming/src/test/java/morfologik/stemming/SequenceEncodersTest.java
  class SequenceEncodersTest (line 14) | @Randomized
    method SequenceEncodersTest (line 20) | public SequenceEncodersTest(EncoderType coderType) {
    method testEncodeSuffixOnRandomSequences (line 24) | @Test
    method testEncodeSamples (line 34) | @Test
    method assertRoundtripEncode (line 50) | private void assertRoundtripEncode(Random rnd, String srcString, Strin...

FILE: morfologik-tools/src/main/java/morfologik/tools/BinaryInput.java
  class BinaryInput (line 13) | final class BinaryInput {
    type LineConsumer (line 18) | private static interface LineConsumer {
      method process (line 19) | byte[] process(byte[] buffer, int length);
    method BinaryInput (line 40) | BinaryInput() {}
    method BinaryInput (line 42) | public BinaryInput(boolean acceptBom, boolean acceptCr, boolean ignore...
    method readBinarySequences (line 48) | List<byte[]> readBinarySequences(Path input, byte separator) throws IO...
    method hasCr (line 96) | private static boolean hasCr(byte[] seq, int length) {
    method forAllLines (line 106) | private static int forAllLines(InputStream is, byte separator, LineCon...

FILE: morfologik-tools/src/main/java/morfologik/tools/CliTool.java
  class CliTool (line 14) | public abstract class CliTool implements Callable<ExitStatus> {
    method CliTool (line 32) | public CliTool() {
    method setCallSystemExit (line 43) | public void setCallSystemExit(boolean flag) {
    method main (line 53) | protected static void main(String[] args, CliTool... commands) {
    method main (line 118) | protected static void main(String[] args, CliTool command) {
    method printf (line 163) | protected static void printf(String msg, Object... args) {
    method checkNotNull (line 167) | protected static <T> T checkNotNull(T arg) {
    method helpDisplayCommandOptions (line 174) | private static void helpDisplayCommandOptions(PrintStream pw, String c...
    method helpDisplayCommandOptions (line 181) | private static void helpDisplayCommandOptions(PrintStream pw, JCommand...

FILE: morfologik-tools/src/main/java/morfologik/tools/CustomParameterConverters.java
  class CustomParameterConverters (line 8) | class CustomParameterConverters implements IStringConverterFactory {
    class PathConverter (line 9) | public static class PathConverter implements IStringConverter<Path> {
      method convert (line 10) | @Override
    method getConverter (line 16) | @Override

FILE: morfologik-tools/src/main/java/morfologik/tools/DictApply.java
  class DictApply (line 20) | @Parameters(
    class LineSupplier (line 52) | private abstract class LineSupplier implements Closeable {
      method nextLine (line 53) | public abstract String nextLine() throws IOException;
      method close (line 55) | @Override
    class ReaderLineSupplier (line 61) | private class ReaderLineSupplier extends LineSupplier {
      method ReaderLineSupplier (line 64) | public ReaderLineSupplier(BufferedReader reader) {
      method nextLine (line 68) | @Override
      method close (line 73) | @Override
    method DictApply (line 79) | DictApply() {}
    method DictApply (line 81) | public DictApply(Path dictionary, Path input, String inputEncoding) {
    method call (line 86) | @Override
    method determineInput (line 118) | private LineSupplier determineInput() throws IOException {
    method validateArguments (line 148) | private ExitStatus validateArguments() {
    method main (line 164) | public static void main(String[] args) {

FILE: morfologik-tools/src/main/java/morfologik/tools/DictCompile.java
  class DictCompile (line 28) | @Parameters(
    method DictCompile (line 56) | DictCompile() {
    method DictCompile (line 60) | public DictCompile(
    method call (line 73) | @Override
    method countOf (line 213) | private static int countOf(byte separator, byte[] row) {
    method indexOf (line 223) | private static int indexOf(byte separator, byte[] row, int fromIndex) {
    method main (line 233) | public static void main(String[] args) {

FILE: morfologik-tools/src/main/java/morfologik/tools/DictDecompile.java
  class DictDecompile (line 16) | @Parameters(
    method DictDecompile (line 41) | DictDecompile() {}
    method DictDecompile (line 43) | public DictDecompile(Path input, Path output, boolean overwrite, boole...
    method call (line 50) | @Override
    method write (line 115) | private void write(OutputStream os, ByteBuffer bb) throws IOException {
    method ensureNoSeparator (line 119) | private boolean ensureNoSeparator(ByteBuffer bb, byte marker) {
    method main (line 129) | public static void main(String[] args) {

FILE: morfologik-tools/src/main/java/morfologik/tools/ExitStatus.java
  type ExitStatus (line 3) | public enum ExitStatus {
    method ExitStatus (line 18) | private ExitStatus(int systemExitCode) {

FILE: morfologik-tools/src/main/java/morfologik/tools/ExitStatusException.java
  class ExitStatusException (line 5) | @SuppressWarnings("serial")
    method ExitStatusException (line 9) | public ExitStatusException(ExitStatus status, String message, Object.....
    method ExitStatusException (line 13) | public ExitStatusException(ExitStatus status, Throwable t, String mess...

FILE: morfologik-tools/src/main/java/morfologik/tools/FSABuild.java
  class FSABuild (line 5) | @Parameters(

FILE: morfologik-tools/src/main/java/morfologik/tools/FSACompile.java
  class FSACompile (line 17) | @Parameters(
    method FSACompile (line 42) | FSACompile() {
    method FSACompile (line 46) | public FSACompile(
    method call (line 58) | @Override
    method main (line 73) | public static void main(String[] args) {

FILE: morfologik-tools/src/main/java/morfologik/tools/FSADecompile.java
  class FSADecompile (line 15) | @Parameters(
    method FSADecompile (line 33) | FSADecompile() {}
    method FSADecompile (line 35) | public FSADecompile(Path input, Path output) {
    method call (line 40) | @Override
    method main (line 58) | public static void main(String[] args) {

FILE: morfologik-tools/src/main/java/morfologik/tools/FSADump.java
  class FSADump (line 5) | @Parameters(

FILE: morfologik-tools/src/main/java/morfologik/tools/FSAInfo.java
  class FSAInfo (line 16) | @Parameters(
    method FSAInfo (line 27) | FSAInfo() {}
    method FSAInfo (line 29) | public FSAInfo(Path input) {
    method call (line 33) | @Override
    method byteAsChar (line 80) | static String byteAsChar(byte v) {
    method main (line 91) | public static void main(String[] args) {

FILE: morfologik-tools/src/main/java/morfologik/tools/Launcher.java
  class Launcher (line 4) | public final class Launcher {
    method Launcher (line 5) | private Launcher() {}
    method main (line 7) | @SuppressWarnings("deprecation")

FILE: morfologik-tools/src/main/java/morfologik/tools/SerializationFormat.java
  type SerializationFormat (line 8) | public enum SerializationFormat {
    method getSerializer (line 10) | @Override
    method getSerializer (line 17) | @Override
    method getSerializer (line 23) | abstract FSASerializer getSerializer();

FILE: morfologik-tools/src/main/java/morfologik/tools/ValidateFileExists.java
  class ValidateFileExists (line 9) | public final class ValidateFileExists implements IValueValidator<Path> {
    method validate (line 10) | @Override

FILE: morfologik-tools/src/main/java/morfologik/tools/ValidateParentDirExists.java
  class ValidateParentDirExists (line 9) | public final class ValidateParentDirExists implements IValueValidator<Pa...
    method validate (line 10) | @Override

FILE: morfologik-tools/src/test/java/morfologik/tools/DictCompileBug.java
  class DictCompileBug (line 22) | @Randomized
    method testSeparatorInEncoded (line 24) | @Test

FILE: morfologik-tools/src/test/java/morfologik/tools/DictCompileTest.java
  class DictCompileTest (line 25) | @Randomized
    method testRoundTrip (line 27) | @RepeatedTest(200)

FILE: morfologik-tools/src/test/java/morfologik/tools/FSACompileTest.java
  class FSACompileTest (line 29) | @Randomized
    method testCliInvocation (line 31) | @RepeatedTest(100)
    method testEmptyWarning (line 76) | @Test
    method testCrWarning (line 101) | @Test
    method testBomWarning (line 126) | @Test
    method sysouts (line 155) | private String sysouts(Callable<Void> callable) throws Exception {
Condensed preview — 157 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (496K chars).
[
  {
    "path": ".github/workflows/ci.yml",
    "chars": 347,
    "preview": "name: CI\n\non:\n  push:\n    branches: [master]\n  pull_request:\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps:\n     "
  },
  {
    "path": ".gitignore",
    "chars": 114,
    "preview": "*.versionsBackup\r\ntmp/\r\ndist/\r\ntarget/\r\n*.patch\r\n.eclipse/\r\n.project\r\n.classpath\r\n.settings\r\n*.name\r\n*.iml\r\n.idea/"
  },
  {
    "path": "CHANGES.txt",
    "chars": 23549,
    "preview": "\r\nMorfologik, Change Log\r\n======================\r\n\r\nFor an up-to-date CHANGES file see \r\nhttps://github.com/morfologik/m"
  },
  {
    "path": "CONTRIBUTING.txt",
    "chars": 193,
    "preview": "Contributions are welcome!\r\n\r\nUse a modern Java version for compilation and testing (JDK 21+ recommended).\r\n\r\nIf you use"
  },
  {
    "path": "LICENSE.txt",
    "chars": 1605,
    "preview": "\r\nCopyright (c) 2006 Dawid Weiss\r\nCopyright (c) 2007-2015 Dawid Weiss, Marcin Miłkowski\r\nAll rights reserved.\r\n\r\nRedistr"
  },
  {
    "path": "README.txt",
    "chars": 558,
    "preview": "MORFOLOGIK\r\n==========\r\n\r\nTools for finite state automata construction and dictionary-based \r\nmorphological dictionaries"
  },
  {
    "path": "etc/eclipse/settings/org.eclipse.jdt.core.prefs",
    "chars": 32196,
    "preview": "eclipse.preferences.version=1\r\norg.eclipse.jdt.core.compiler.annotation.inheritNullAnnotations=disabled\r\norg.eclipse.jdt"
  },
  {
    "path": "etc/eclipse/settings/org.eclipse.m2e.core.prefs",
    "chars": 97,
    "preview": "activeProfiles=eclipse\r\neclipse.preferences.version=1\r\nresolveWorkspaceProjects=true\r\nversion=1\r\n"
  },
  {
    "path": "etc/forbidden-apis/signatures.txt",
    "chars": 2120,
    "preview": "@defaultMessage Convert to URI\njava.net.URL#getPath()\njava.net.URL#getFile()\n\n@defaultMessage spawns threads with vague "
  },
  {
    "path": "morfologik-fsa/pom.xml",
    "chars": 1260,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/"
  },
  {
    "path": "morfologik-fsa/src/main/java/morfologik/fsa/ByteSequenceIterator.java",
    "chars": 4384,
    "preview": "package morfologik.fsa;\n\nimport java.nio.ByteBuffer;\nimport java.util.*;\n\n/**\n * An iterator that traverses the right la"
  },
  {
    "path": "morfologik-fsa/src/main/java/morfologik/fsa/CFSA.java",
    "chars": 9997,
    "preview": "package morfologik.fsa;\n\nimport static morfologik.fsa.FSAFlags.*;\n\nimport java.io.*;\nimport java.util.*;\n\n/**\n * CFSA (C"
  },
  {
    "path": "morfologik-fsa/src/main/java/morfologik/fsa/CFSA2.java",
    "chars": 9318,
    "preview": "package morfologik.fsa;\n\nimport java.io.DataInputStream;\nimport java.io.IOException;\nimport java.io.InputStream;\nimport "
  },
  {
    "path": "morfologik-fsa/src/main/java/morfologik/fsa/FSA.java",
    "chars": 11443,
    "preview": "package morfologik.fsa;\n\nimport java.io.ByteArrayOutputStream;\nimport java.io.IOException;\nimport java.io.InputStream;\ni"
  },
  {
    "path": "morfologik-fsa/src/main/java/morfologik/fsa/FSA5.java",
    "chars": 8308,
    "preview": "package morfologik.fsa;\n\nimport static morfologik.fsa.FSAFlags.*;\n\nimport java.io.DataInputStream;\nimport java.io.IOExce"
  },
  {
    "path": "morfologik-fsa/src/main/java/morfologik/fsa/FSAFlags.java",
    "chars": 1400,
    "preview": "package morfologik.fsa;\n\nimport java.util.Set;\n\n/** FSA automaton flags. Where applicable, flags follow Daciuk's <code>f"
  },
  {
    "path": "morfologik-fsa/src/main/java/morfologik/fsa/FSAHeader.java",
    "chars": 1788,
    "preview": "package morfologik.fsa;\n\nimport java.io.IOException;\nimport java.io.InputStream;\nimport java.io.OutputStream;\n\n/** Stand"
  },
  {
    "path": "morfologik-fsa/src/main/java/morfologik/fsa/FSATraversal.java",
    "chars": 6927,
    "preview": "package morfologik.fsa;\n\nimport static morfologik.fsa.MatchResult.*;\n\n/** This class implements some common matching and"
  },
  {
    "path": "morfologik-fsa/src/main/java/morfologik/fsa/MatchResult.java",
    "chars": 2042,
    "preview": "package morfologik.fsa;\n\n/**\n * A matching result returned from {@link FSATraversal}.\n *\n * @see FSATraversal\n */\npublic"
  },
  {
    "path": "morfologik-fsa/src/main/java/morfologik/fsa/StateVisitor.java",
    "chars": 209,
    "preview": "package morfologik.fsa;\n\n/**\n * State visitor.\n *\n * @see FSA#visitInPostOrder(StateVisitor)\n * @see FSA#visitInPreOrder"
  },
  {
    "path": "morfologik-fsa-builders/pom.xml",
    "chars": 1600,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/"
  },
  {
    "path": "morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/CFSA2Serializer.java",
    "chars": 15144,
    "preview": "package morfologik.fsa.builders;\n\nimport static morfologik.fsa.CFSA2.*;\nimport static morfologik.fsa.FSAFlags.*;\n\nimport"
  },
  {
    "path": "morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/ConstantArcSizeFSA.java",
    "chars": 3260,
    "preview": "package morfologik.fsa.builders;\n\nimport java.util.Collections;\nimport java.util.Set;\nimport morfologik.fsa.FSA;\nimport "
  },
  {
    "path": "morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSA5Serializer.java",
    "chars": 8153,
    "preview": "package morfologik.fsa.builders;\n\nimport static morfologik.fsa.FSAFlags.*;\n\nimport com.carrotsearch.hppc.IntIntHashMap;\n"
  },
  {
    "path": "morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSABuilder.java",
    "chars": 14451,
    "preview": "package morfologik.fsa.builders;\n\nimport static morfologik.fsa.builders.ConstantArcSizeFSA.*;\n\nimport java.util.*;\nimpor"
  },
  {
    "path": "morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSAInfo.java",
    "chars": 3507,
    "preview": "package morfologik.fsa.builders;\n\nimport com.carrotsearch.hppc.IntIntHashMap;\nimport java.util.BitSet;\nimport morfologik"
  },
  {
    "path": "morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSASerializer.java",
    "chars": 1719,
    "preview": "package morfologik.fsa.builders;\n\nimport java.io.IOException;\nimport java.io.OutputStream;\nimport java.util.Set;\nimport "
  },
  {
    "path": "morfologik-fsa-builders/src/main/java/morfologik/fsa/builders/FSAUtils.java",
    "chars": 5937,
    "preview": "package morfologik.fsa.builders;\n\nimport com.carrotsearch.hppc.IntIntHashMap;\nimport java.io.IOException;\nimport java.io"
  },
  {
    "path": "morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/CFSA2SerializerTest.java",
    "chars": 192,
    "preview": "package morfologik.fsa.builders;\n\n/** */\npublic class CFSA2SerializerTest extends SerializerTestBase {\n  protected CFSA2"
  },
  {
    "path": "morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSA5SerializerTest.java",
    "chars": 189,
    "preview": "package morfologik.fsa.builders;\n\n/** */\npublic class FSA5SerializerTest extends SerializerTestBase {\n  protected FSA5Se"
  },
  {
    "path": "morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSA5Test.java",
    "chars": 3237,
    "preview": "package morfologik.fsa.builders;\n\nimport static morfologik.fsa.FSAFlags.*;\nimport static org.junit.jupiter.api.Assertion"
  },
  {
    "path": "morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSABuilderTest.java",
    "chars": 2290,
    "preview": "package morfologik.fsa.builders;\n\nimport static morfologik.fsa.builders.FSATestUtils.*;\nimport static org.junit.jupiter."
  },
  {
    "path": "morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSATestUtils.java",
    "chars": 5862,
    "preview": "package morfologik.fsa.builders;\n\nimport static org.junit.jupiter.api.Assertions.*;\n\nimport java.nio.ByteBuffer;\nimport "
  },
  {
    "path": "morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/FSATraversalTest.java",
    "chars": 5627,
    "preview": "package morfologik.fsa.builders;\n\nimport static java.nio.charset.StandardCharsets.*;\nimport static morfologik.fsa.MatchR"
  },
  {
    "path": "morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/MinMax.java",
    "chars": 293,
    "preview": "package morfologik.fsa.builders;\n\n/** Minimum/maximum and range. */\nfinal class MinMax {\n  public final int min;\n  publi"
  },
  {
    "path": "morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/SerializerTestBase.java",
    "chars": 6553,
    "preview": "package morfologik.fsa.builders;\n\nimport static morfologik.fsa.FSAFlags.*;\nimport static org.junit.jupiter.api.Assertion"
  },
  {
    "path": "morfologik-fsa-builders/src/test/java/morfologik/fsa/builders/TestBase.java",
    "chars": 891,
    "preview": "package morfologik.fsa.builders;\n\nimport com.carrotsearch.randomizedtesting.jupiter.DetectThreadLeaks;\nimport com.carrot"
  },
  {
    "path": "morfologik-fsa-builders/src/test/resources/morfologik/fsa/builders/abc.in",
    "chars": 16,
    "preview": "a\naba\nac\nb\nba\nc\n"
  },
  {
    "path": "morfologik-fsa-builders/src/test/resources/morfologik/fsa/builders/minimal.in",
    "chars": 13,
    "preview": "+NP\nn+N\nn+NP\n"
  },
  {
    "path": "morfologik-fsa-builders/src/test/resources/morfologik/fsa/builders/minimal2.in",
    "chars": 291,
    "preview": "3-D+A+JJ\n3-D+A+NN\n4-F+A+NN\n4-H+A+JJ\nz+A+NN\nz-axis+A+NN\nzB+A+NN\nzZt+A+NNP\nza-zen+A+NN\nzabaglione+A+NN\nzabagliones+B+NNS\nz"
  },
  {
    "path": "morfologik-polish/pom.xml",
    "chars": 1488,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/"
  },
  {
    "path": "morfologik-polish/src/main/java/morfologik/stemming/polish/PolishStemmer.java",
    "chars": 2141,
    "preview": "package morfologik.stemming.polish;\n\nimport java.io.IOException;\nimport java.net.URL;\nimport java.security.AccessControl"
  },
  {
    "path": "morfologik-polish/src/main/resources/morfologik/stemming/polish/polish.LICENSE.Polish.txt",
    "chars": 1708,
    "preview": "Morfologik\n\nVERSION: 2.1 PoliMorf\nBUILD:   2016-02-13 19:37:51+01:00\nGIT:     6e63b53\n\nCopyright (c) 2016, Marcin Miłkow"
  },
  {
    "path": "morfologik-polish/src/main/resources/morfologik/stemming/polish/polish.LICENSE.txt",
    "chars": 1392,
    "preview": "Morfologik\n\nVERSION: 2.1 PoliMorf\nBUILD:   2016-02-13 19:37:50+01:00\nGIT:     6e63b53\n\nCopyright (c) 2016, Marcin Miłkow"
  },
  {
    "path": "morfologik-polish/src/main/resources/morfologik/stemming/polish/polish.README.Polish.txt",
    "chars": 5603,
    "preview": "Morfologik to projekt tworzenia polskich słowników morfosyntaktycznych (stąd \nnazwa) służących do znakowania morfosyntak"
  },
  {
    "path": "morfologik-polish/src/main/resources/morfologik/stemming/polish/polish.README.txt",
    "chars": 372,
    "preview": "Morfologik is a project aiming at generating Polish morphosyntactic\ndictionaries (hence the name) used for part-of-speec"
  },
  {
    "path": "morfologik-polish/src/main/resources/morfologik/stemming/polish/polish.info",
    "chars": 1675,
    "preview": "#\n# Morfologik Polish (stemming dictionary)\n# Version: 2.1 PoliMorf\n# Date: 2016-02-13 19:32:15+01:00\n# Git: 6e63b53\n#\n#"
  },
  {
    "path": "morfologik-polish/src/test/java/morfologik/stemming/polish/Gh27Test.java",
    "chars": 848,
    "preview": "package morfologik.stemming.polish;\n\nimport java.io.IOException;\nimport java.util.Locale;\nimport morfologik.stemming.Wor"
  },
  {
    "path": "morfologik-polish/src/test/java/morfologik/stemming/polish/PolishMorfologikStemmerTest.java",
    "chars": 4472,
    "preview": "package morfologik.stemming.polish;\n\nimport static org.junit.jupiter.api.Assertions.*;\n\nimport java.io.IOException;\nimpo"
  },
  {
    "path": "morfologik-speller/pom.xml",
    "chars": 1440,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/"
  },
  {
    "path": "morfologik-speller/src/main/java/morfologik/speller/HMatrix.java",
    "chars": 3209,
    "preview": "package morfologik.speller;\n\nimport java.util.Arrays;\n\n/**\n * Keeps track of already computed values of edit distance. R"
  },
  {
    "path": "morfologik-speller/src/main/java/morfologik/speller/Speller.java",
    "chars": 40464,
    "preview": "package morfologik.speller;\n\nimport static morfologik.fsa.MatchResult.EXACT_MATCH;\nimport static morfologik.fsa.MatchRes"
  },
  {
    "path": "morfologik-speller/src/test/java/morfologik/speller/HMatrixTest.java",
    "chars": 418,
    "preview": "package morfologik.speller;\n\nimport static org.junit.jupiter.api.Assertions.*;\n\nimport org.junit.jupiter.api.Test;\n\npubl"
  },
  {
    "path": "morfologik-speller/src/test/java/morfologik/speller/SpellerTest.java",
    "chars": 20225,
    "preview": "package morfologik.speller;\n\nimport static org.junit.jupiter.api.Assertions.assertEquals;\nimport static org.junit.jupite"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/dict-with-freq.info",
    "chars": 931,
    "preview": "#\r\n# Dictionary properties.\r\n#\r\n\r\nfsa.dict.separator=+\r\nfsa.dict.encoding=iso-8859-2\r\n\r\nfsa.dict.encoder=suffix\r\n\r\nfsa.d"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/dict-with-freq.txt",
    "chars": 162,
    "preview": "ageist+C\ndeist+G\ndidst+A\ndigest+J\ndirest+E\ndist+G\ndivest+I\nfist+J\ngist+G\ngrist+I\nheist+I\nhist+A\njest+H\njilt+D\njoist+F\nju"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/issue38.info",
    "chars": 136,
    "preview": "fsa.dict.separator=+\nfsa.dict.encoding=utf-8\nfsa.dict.encoder=suffix\nfsa.dict.speller.replacement-pairs=^Ij IJ,^alot a_l"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/issue38.input",
    "chars": 40,
    "preview": "IJsland+IJsland\na lot+a lot\npå ny+på ny\n"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/issue94.info",
    "chars": 149,
    "preview": "fsa.dict.speller.replacement-pairs=ä e\nfsa.dict.encoder=SUFFIX\nfsa.dict.separator=+\nfsa.dict.encoding=utf-8\nfsa.dict.spe"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/pissara-test.info",
    "chars": 112,
    "preview": "fsa.dict.separator=+\nfsa.dict.encoding=utf-8\nfsa.dict.encoder=NONE\nfsa.dict.speller.replacement-pairs=s ss,ss s\n"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/pissara-test.txt",
    "chars": 25,
    "preview": "passara\npassarà\npissarra\n"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/reps_dist2.info",
    "chars": 132,
    "preview": "fsa.dict.separator=+\nfsa.dict.encoding=utf-8\nfsa.dict.speller.replacement-pairs=s ss,t d,R Rh,y ij,ę em,em ę\nfsa.dict.en"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/reps_dist2.txt",
    "chars": 27,
    "preview": "Mitmuss\nRhythmus\nWald\nBand\n"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/single-char-word.info",
    "chars": 258,
    "preview": "#\n# Dictionary properties.\n#\n\nfsa.dict.separator=+\nfsa.dict.encoding=Cp1250\n\nfsa.dict.encoder=suffix\n\nfsa.dict.speller.i"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/slownik.info",
    "chars": 270,
    "preview": "#\r\n# Dictionary properties.\r\n#\r\n\r\nfsa.dict.separator=+\r\nfsa.dict.encoding=Cp1250\r\n\r\nfsa.dict.encoder=suffix\r\n\r\nfsa.dict."
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/test-infix.info",
    "chars": 157,
    "preview": "#\r\n# Dictionary properties.\r\n#\r\n\r\nfsa.dict.separator=+\r\nfsa.dict.encoding=iso-8859-2\r\n\r\nfsa.dict.encoder=infix\r\n\r\nfsa.di"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/test-utf-spell.info",
    "chars": 376,
    "preview": "#\r\n# Dictionary properties.\r\n# UTF-8 encoding or native2ascii has to be used for non-ASCII data.\r\n#\r\n\r\nfsa.dict.separato"
  },
  {
    "path": "morfologik-speller/src/test/resources/morfologik/speller/test_freq_iso.info",
    "chars": 982,
    "preview": "#\n# Dictionary properties.\n#\n\nfsa.dict.separator=+\nfsa.dict.encoding=iso-8859-2\n\nfsa.dict.encoder=suffix\n\nfsa.dict.frequ"
  },
  {
    "path": "morfologik-stemming/pom.xml",
    "chars": 1451,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/ArrayViewList.java",
    "chars": 1806,
    "preview": "package morfologik.stemming;\n\nimport java.util.*;\n\n/** A view over a range of an array. */\n@SuppressWarnings(\"serial\")\nf"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/BufferUtils.java",
    "chars": 5494,
    "preview": "package morfologik.stemming;\n\nimport java.nio.ByteBuffer;\nimport java.nio.CharBuffer;\nimport java.nio.charset.CharacterC"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/Dictionary.java",
    "chars": 3401,
    "preview": "package morfologik.stemming;\n\nimport java.io.IOException;\nimport java.io.InputStream;\nimport java.net.MalformedURLExcept"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/DictionaryAttribute.java",
    "chars": 11296,
    "preview": "package morfologik.stemming;\n\nimport java.nio.charset.Charset;\nimport java.util.ArrayList;\nimport java.util.Arrays;\nimpo"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/DictionaryIterator.java",
    "chars": 3446,
    "preview": "package morfologik.stemming;\n\nimport java.nio.ByteBuffer;\nimport java.nio.CharBuffer;\nimport java.nio.charset.CharsetDec"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/DictionaryLookup.java",
    "chars": 9509,
    "preview": "package morfologik.stemming;\n\nimport static morfologik.fsa.MatchResult.SEQUENCE_IS_A_PREFIX;\n\nimport java.nio.ByteBuffer"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/DictionaryMetadata.java",
    "chars": 14347,
    "preview": "package morfologik.stemming;\n\nimport static morfologik.stemming.DictionaryAttribute.*;\n\nimport java.io.IOException;\nimpo"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/DictionaryMetadataBuilder.java",
    "chars": 5975,
    "preview": "package morfologik.stemming;\n\nimport java.nio.charset.Charset;\nimport java.util.EnumMap;\nimport java.util.List;\nimport j"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/EncoderType.java",
    "chars": 587,
    "preview": "package morfologik.stemming;\n\n/** Known {@link ISequenceEncoder}s. */\npublic enum EncoderType {\n  SUFFIX {\n    @Override"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/ISequenceEncoder.java",
    "chars": 2014,
    "preview": "package morfologik.stemming;\n\nimport java.nio.ByteBuffer;\n\n/**\n * The logic of encoding one sequence of bytes relative t"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/IStemmer.java",
    "chars": 801,
    "preview": "package morfologik.stemming;\n\nimport java.util.List;\n\n/** A generic &quot;stemmer&quot; interface in Morfologik. */\npubl"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/NoEncoder.java",
    "chars": 862,
    "preview": "package morfologik.stemming;\n\nimport java.nio.ByteBuffer;\n\n/** No relative encoding at all (full target form is returned"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/TrimInfixAndSuffixEncoder.java",
    "chars": 5109,
    "preview": "package morfologik.stemming;\n\nimport java.nio.ByteBuffer;\n\n/**\n * Encodes <code>dst</code> relative to <code>src</code> "
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/TrimPrefixAndSuffixEncoder.java",
    "chars": 3611,
    "preview": "package morfologik.stemming;\n\nimport java.nio.ByteBuffer;\n\n/**\n * Encodes <code>dst</code> relative to <code>src</code> "
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/TrimSuffixEncoder.java",
    "chars": 2380,
    "preview": "package morfologik.stemming;\n\nimport java.nio.ByteBuffer;\n\n/**\n * Encodes <code>dst</code> relative to <code>src</code> "
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/UnmappableInputException.java",
    "chars": 411,
    "preview": "package morfologik.stemming;\n\nimport java.nio.charset.CharacterCodingException;\n\n/**\n * Thrown when some input cannot be"
  },
  {
    "path": "morfologik-stemming/src/main/java/morfologik/stemming/WordData.java",
    "chars": 7099,
    "preview": "package morfologik.stemming;\n\nimport java.io.UnsupportedEncodingException;\nimport java.nio.ByteBuffer;\nimport java.nio.C"
  },
  {
    "path": "morfologik-stemming/src/test/java/morfologik/stemming/DictionaryLookupTest.java",
    "chars": 8364,
    "preview": "package morfologik.stemming;\n\nimport static org.assertj.core.api.Assertions.*;\nimport static org.junit.jupiter.api.Asser"
  },
  {
    "path": "morfologik-stemming/src/test/java/morfologik/stemming/DictionaryMetadataBuilderTest.java",
    "chars": 1536,
    "preview": "package morfologik.stemming;\n\nimport java.io.IOException;\nimport java.nio.charset.Charset;\nimport java.util.Collections;"
  },
  {
    "path": "morfologik-stemming/src/test/java/morfologik/stemming/DictionaryMetadataTest.java",
    "chars": 2113,
    "preview": "package morfologik.stemming;\n\nimport com.carrotsearch.randomizedtesting.jupiter.Randomized;\nimport com.carrotsearch.rand"
  },
  {
    "path": "morfologik-stemming/src/test/java/morfologik/stemming/DictionaryTest.java",
    "chars": 1021,
    "preview": "package morfologik.stemming;\n\nimport static org.junit.jupiter.api.Assertions.*;\n\nimport com.carrotsearch.randomizedtesti"
  },
  {
    "path": "morfologik-stemming/src/test/java/morfologik/stemming/EncodersTest.java",
    "chars": 1139,
    "preview": "package morfologik.stemming;\n\nimport com.carrotsearch.randomizedtesting.jupiter.Randomized;\nimport com.carrotsearch.rand"
  },
  {
    "path": "morfologik-stemming/src/test/java/morfologik/stemming/SequenceEncodersTest.java",
    "chars": 2551,
    "preview": "package morfologik.stemming;\n\nimport com.carrotsearch.randomizedtesting.jupiter.Randomized;\nimport com.carrotsearch.rand"
  },
  {
    "path": "morfologik-stemming/src/test/resources/morfologik/stemming/escape-separator.info",
    "chars": 122,
    "preview": "#\r\n# An escape sequence for the separator.\r\n#\r\n\r\nfsa.dict.separator=\\t\r\nfsa.dict.encoding=UTF-8\r\nfsa.dict.encoder=suffix"
  },
  {
    "path": "morfologik-stemming/src/test/resources/morfologik/stemming/test-diacritics-utf8.info",
    "chars": 108,
    "preview": "#\r\n# Dictionary properties.\r\n#\r\n\r\nfsa.dict.separator=+\r\nfsa.dict.encoding=UTF-8\r\n\r\nfsa.dict.encoder=suffix\r\n"
  },
  {
    "path": "morfologik-stemming/src/test/resources/morfologik/stemming/test-infix.info",
    "chars": 110,
    "preview": "#\r\n# Dictionary properties.\r\n#\r\n\r\nfsa.dict.separator=+\r\nfsa.dict.encoding=iso-8859-2\r\n\r\nfsa.dict.encoder=infix"
  },
  {
    "path": "morfologik-stemming/src/test/resources/morfologik/stemming/test-prefix.info",
    "chars": 159,
    "preview": "#\r\n# Dictionary properties.\r\n#\r\n\r\nfsa.dict.separator=+\r\nfsa.dict.encoding=iso-8859-2\r\n\r\nfsa.dict.encoder=prefix\r\n\r\nfsa.d"
  },
  {
    "path": "morfologik-stemming/src/test/resources/morfologik/stemming/test-removed-props.info",
    "chars": 114,
    "preview": "#\r\n# Dictionary properties.\r\n#\r\n\r\nfsa.dict.separator=+\r\nfsa.dict.encoding=iso-8859-2\r\n\r\nfsa.dict.uses-infixes=true"
  },
  {
    "path": "morfologik-stemming/src/test/resources/morfologik/stemming/test-separator-in-lookup.in",
    "chars": 15,
    "preview": "l+A+LW\nl+A+NN1d"
  },
  {
    "path": "morfologik-stemming/src/test/resources/morfologik/stemming/test-separators.info",
    "chars": 110,
    "preview": "#\r\n# Dictionary properties.\r\n#\r\n\r\nfsa.dict.separator=+\r\nfsa.dict.encoding=iso8859-1\r\n\r\nfsa.dict.encoder=none\r\n"
  },
  {
    "path": "morfologik-stemming/src/test/resources/morfologik/stemming/test-separators.txt",
    "chars": 116,
    "preview": "token1+\ntoken2++\ntoken3+++\ntoken4+token2\ntoken5+token2+\ntoken6+token2++\ntoken7+token2+token3+\ntoken8+token2+token3++"
  },
  {
    "path": "morfologik-stemming/src/test/resources/morfologik/stemming/test-synth.info",
    "chars": 111,
    "preview": "#\r\n# Dictionary properties.\r\n#\r\n\r\nfsa.dict.separator=+\r\nfsa.dict.encoding=iso-8859-2\r\n\r\nfsa.dict.encoder=suffix"
  },
  {
    "path": "morfologik-stemming/src/test/resources/morfologik/stemming/unicode-separator.info",
    "chars": 126,
    "preview": "#\r\n# An escape sequence for the separator.\r\n#\r\n\r\nfsa.dict.separator=\\u0009\r\nfsa.dict.encoding=UTF-8\r\nfsa.dict.encoder=su"
  },
  {
    "path": "morfologik-tools/pom.xml",
    "chars": 3958,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/"
  },
  {
    "path": "morfologik-tools/src/main/assembly/package.xml",
    "chars": 1181,
    "preview": "<assembly xmlns=\"http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0\"\n  xmlns:xsi=\"http://www.w3.org/20"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/BinaryInput.java",
    "chars": 3892,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.Parameter;\nimport java.io.BufferedInputStream;\nimport java.io.IOE"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/CliTool.java",
    "chars": 6013,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.JCommander;\nimport com.beust.jcommander.MissingCommandException;\n"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/CustomParameterConverters.java",
    "chars": 611,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.IStringConverter;\nimport com.beust.jcommander.IStringConverterFac"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/DictApply.java",
    "chars": 5055,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.Parameter;\nimport com.beust.jcommander.Parameters;\nimport java.io"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/DictCompile.java",
    "chars": 7807,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.Parameter;\nimport com.beust.jcommander.Parameters;\nimport com.beu"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/DictDecompile.java",
    "chars": 4210,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.Parameter;\nimport com.beust.jcommander.Parameters;\nimport java.io"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/ExitStatus.java",
    "chars": 477,
    "preview": "package morfologik.tools;\n\npublic enum ExitStatus {\n  /** The command was successful. */\n  SUCCESS(0),\n\n  /** Unknown er"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/ExitStatusException.java",
    "chars": 480,
    "preview": "package morfologik.tools;\n\nimport java.util.Locale;\n\n@SuppressWarnings(\"serial\")\nclass ExitStatusException extends Runti"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/FSABuild.java",
    "chars": 272,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.Parameters;\n\n@Parameters(\n    hidden = true,\n    commandNames = \""
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/FSACompile.java",
    "chars": 2278,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.Parameter;\nimport com.beust.jcommander.Parameters;\nimport com.beu"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/FSADecompile.java",
    "chars": 1709,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.Parameter;\nimport com.beust.jcommander.Parameters;\nimport java.io"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/FSADump.java",
    "chars": 261,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.Parameters;\n\n@Parameters(\n    hidden = true,\n    commandNames = \""
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/FSAInfo.java",
    "chars": 2976,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.Parameter;\nimport com.beust.jcommander.Parameters;\nimport java.io"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/Launcher.java",
    "chars": 428,
    "preview": "package morfologik.tools;\n\n/** JAR entry point. */\npublic final class Launcher {\n  private Launcher() {}\n\n  @SuppressWar"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/SerializationFormat.java",
    "chars": 546,
    "preview": "package morfologik.tools;\n\nimport morfologik.fsa.builders.CFSA2Serializer;\nimport morfologik.fsa.builders.FSA5Serializer"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/ValidateFileExists.java",
    "chars": 823,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.IValueValidator;\nimport com.beust.jcommander.ParameterException;\n"
  },
  {
    "path": "morfologik-tools/src/main/java/morfologik/tools/ValidateParentDirExists.java",
    "chars": 885,
    "preview": "package morfologik.tools;\n\nimport com.beust.jcommander.IValueValidator;\nimport com.beust.jcommander.ParameterException;\n"
  },
  {
    "path": "morfologik-tools/src/main/package/README.txt",
    "chars": 232,
    "preview": "${project.artifactId}, ${project.version}\r\n\r\nTools for morphological dictionary and finite state automata construction.\r"
  },
  {
    "path": "morfologik-tools/src/main/package/examples/01-fsa-build.input",
    "chars": 37,
    "preview": "black sabbath\nmetallica\njudas priest\n"
  },
  {
    "path": "morfologik-tools/src/main/package/examples/01-fsa-build.txt",
    "chars": 759,
    "preview": "# This example constructs a finite state automaton (FSA) out\r\n# of byte sequences in the input file:\r\n#\r\n# https://en.wi"
  },
  {
    "path": "morfologik-tools/src/main/package/examples/02-fsa-dump.txt",
    "chars": 276,
    "preview": "# This example dumps byte sequences from a finite\r\n# state automaton (created in a previous example), \r\n# separating eac"
  },
  {
    "path": "morfologik-tools/src/main/package/examples/03-fsa-info.txt",
    "chars": 307,
    "preview": "# This example prints diagnostic information about\r\n# a compiled automaton.\r\n\r\necho \"FSA5:\"\r\njava -jar ../lib/${project."
  },
  {
    "path": "morfologik-tools/src/main/package/examples/04-dict-compile.info",
    "chars": 852,
    "preview": "#\r\n# Dictionary metadata. A Java property file, read as UTF-8.\r\n#\r\n\r\n#\r\n# REQUIRED PROPERTIES\r\n#\r\n\r\n# Column (lemma, inf"
  },
  {
    "path": "morfologik-tools/src/main/package/examples/04-dict-compile.input",
    "chars": 220,
    "preview": "jawa;jawy;subst:pl:acc:f\njawa;jawy;subst:pl:nom:f\njawa;jawy;subst:pl:voc:f\njawa;jawy;subst:sg:gen:f\njawór;jawór;subst:sg"
  },
  {
    "path": "morfologik-tools/src/main/package/examples/04-dict-compile.txt",
    "chars": 801,
    "preview": "#\r\n# This example compiles a dictionary for use with DictionaryLookup \r\n# (dictionary-driven stemming and morphological "
  },
  {
    "path": "morfologik-tools/src/main/package/examples/05-dict-decompile.txt",
    "chars": 352,
    "preview": "#\r\n# This example decompiles an existing dictionary into\r\n# its source form (columns).\r\n#\r\n# The input file must point a"
  },
  {
    "path": "morfologik-tools/src/test/java/morfologik/tools/DictCompileBug.java",
    "chars": 2280,
    "preview": "package morfologik.tools;\n\nimport com.carrotsearch.randomizedtesting.jupiter.Randomized;\nimport com.carrotsearch.randomi"
  },
  {
    "path": "morfologik-tools/src/test/java/morfologik/tools/DictCompileTest.java",
    "chars": 5075,
    "preview": "package morfologik.tools;\n\nimport com.carrotsearch.randomizedtesting.jupiter.Randomized;\nimport com.carrotsearch.randomi"
  },
  {
    "path": "morfologik-tools/src/test/java/morfologik/tools/FSACompileTest.java",
    "chars": 5845,
    "preview": "package morfologik.tools;\n\nimport com.carrotsearch.randomizedtesting.jupiter.Randomized;\nimport com.carrotsearch.randomi"
  },
  {
    "path": "pom.xml",
    "chars": 21385,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/"
  }
]

// ... and 23 more files (download for full content)

About this extraction

This page contains the full source code of the morfologik/morfologik-stemming GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 157 files (455.1 KB), approximately 120.6k tokens, and a symbol index with 659 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!