Showing preview only (8,569K chars total). Download the full file or copy to clipboard to get everything.
Repository: OpenCCG/openccg
Branch: master
Commit: 9968e814a490
Files: 981
Total size: 8.0 MB
Directory structure:
gitextract_fqyg89tm/
├── .gitignore
├── AUTHORS
├── CHANGES
├── LICENSE
├── README.md
├── SAMPLE_GRAMMARS
├── TODO
├── bin/
│ ├── ccg-build
│ ├── ccg-build.bat
│ ├── ccg-cvr
│ ├── ccg-cvr.bat
│ ├── ccg-draw-graph
│ ├── ccg-draw-graph.bat
│ ├── ccg-draw-tree
│ ├── ccg-draw-tree.bat
│ ├── ccg-env
│ ├── ccg-env.bat
│ ├── ccg-grammardoc
│ ├── ccg-grammardoc.bat
│ ├── ccg-gt
│ ├── ccg-gt.bat
│ ├── ccg-ht-factors
│ ├── ccg-hypertagger
│ ├── ccg-hypertagger.bat
│ ├── ccg-parse
│ ├── ccg-parse.bat
│ ├── ccg-postagger
│ ├── ccg-postagger.bat
│ ├── ccg-realize
│ ├── ccg-realize.bat
│ ├── ccg-supertagger
│ ├── ccg-supertagger.bat
│ ├── ccg-test
│ ├── ccg-test.bat
│ ├── ccg-update
│ ├── ccg-update.bat
│ ├── ccg2xml
│ ├── ccg2xml.bat
│ ├── ccg_draw_tree.py
│ ├── dlf_parser.py
│ ├── tccg
│ ├── tccg.bat
│ ├── visccg
│ ├── visccg.bat
│ └── wccg
├── build.xml
├── ccg-format-grammars/
│ ├── arabic/
│ │ └── arabic.ccg
│ ├── inherit/
│ │ └── inherit.ccg
│ ├── tiny/
│ │ └── tiny.ccg
│ └── tinytiny/
│ └── tinytiny.ccg
├── ccgbank/
│ ├── bin/
│ │ ├── american-to-logical-quotes.py
│ │ ├── convert-mtc-systems.py
│ │ ├── convert-spaces-to-newlines.py
│ │ ├── convert_all
│ │ ├── correlate-to-judgments.py
│ │ ├── filter_feats.py
│ │ ├── find-betas-no-gold.py
│ │ ├── gen_parser_events_a
│ │ ├── gen_parser_events_b
│ │ ├── gen_parser_events_c
│ │ ├── gen_parser_events_d
│ │ ├── gen_parser_events_e
│ │ ├── gen_realizer_events_a
│ │ ├── gen_realizer_events_b
│ │ ├── gen_realizer_events_c
│ │ ├── gen_realizer_events_d
│ │ ├── gen_realizer_events_e
│ │ ├── get-text-from-mtc-style.py
│ │ ├── get-truecase-list.py
│ │ ├── get-uniq-nbest.py
│ │ ├── get_factors_from_parse.py
│ │ ├── get_just_words_from_ner_text.py
│ │ ├── lowercase_tagged_text.py
│ │ ├── merge-mtc-ids.py
│ │ ├── merge-stanford-morpha-with-pos.py
│ │ ├── merge_pos_ne.py
│ │ ├── my_unicode.py
│ │ ├── nbest-mtc-to-bleu-nist.py
│ │ ├── ner/
│ │ │ ├── NERApp/
│ │ │ │ └── src/
│ │ │ │ └── nerapp/
│ │ │ │ └── NERApp.java
│ │ │ ├── build-ner-api.properties
│ │ │ ├── build-ner-api.xml
│ │ │ ├── ner-tag.sh
│ │ │ ├── ner_word.py
│ │ │ └── post-process-stanford-ner.py
│ │ ├── normalize_text.py
│ │ ├── post-process-metricsmatr.py
│ │ ├── prepare-for-stanford-morpha.py
│ │ ├── reverse-spaces-to-newlines.py
│ │ ├── run-all-bleu.sh
│ │ ├── stem_nns_vbx
│ │ ├── toUTF-8.py
│ │ └── write_morph.py
│ ├── build-ht.properties
│ ├── build-ht.xml
│ ├── build-models.properties
│ ├── build-models.xml
│ ├── build-original.properties
│ ├── build-original.xml
│ ├── build-ps.properties
│ ├── build-ps.xml
│ ├── build-release.xml
│ ├── build-rz.properties
│ ├── build-rz.xml
│ ├── build-st.properties
│ ├── build-st.xml
│ ├── build.properties
│ ├── build.xml
│ ├── data/
│ │ ├── README
│ │ ├── get_wsj_nns_vb
│ │ ├── novel/
│ │ │ └── two-sents
│ │ ├── sample/
│ │ │ └── AUTO/
│ │ │ └── 00/
│ │ │ └── wsj_0001.auto
│ │ ├── stem_wsj_nns_vb
│ │ ├── wsj-nns-vb
│ │ ├── wsj-nns-vb-stems
│ │ └── wsj_0595Corrected.auto
│ ├── extract/
│ │ ├── add-chunks.xsl
│ │ ├── convert-to-graph.xsl
│ │ ├── convert-to-hlds.xsl
│ │ ├── grammar.xml
│ │ └── raise-nodes.xsl
│ ├── models/
│ │ ├── hypertagger/
│ │ │ ├── ht-prior.flm
│ │ │ ├── ht.config
│ │ │ ├── ht2.config
│ │ │ ├── ht2.train.config
│ │ │ ├── pos.config
│ │ │ ├── posprior.flm
│ │ │ └── vocab.flm
│ │ ├── parser/
│ │ │ ├── binary.flm
│ │ │ ├── gen-events.prefs
│ │ │ ├── leaf.flm
│ │ │ ├── model.init
│ │ │ ├── parse.prefs
│ │ │ ├── top.flm
│ │ │ ├── unary.flm
│ │ │ └── vocab.flm
│ │ ├── realizer/
│ │ │ ├── alph.init
│ │ │ ├── gen-events.prefs
│ │ │ ├── model.init
│ │ │ ├── rz-test.prefs
│ │ │ └── stp3.flm
│ │ └── supertagger/
│ │ ├── pos.config
│ │ ├── posprior.flm
│ │ ├── st.config
│ │ ├── st.config.train
│ │ ├── st.noprior.config
│ │ ├── stprior.flm
│ │ └── vocab.flm
│ ├── original/
│ │ └── models/
│ │ ├── postagger/
│ │ │ ├── pos.config
│ │ │ └── posprior.flm
│ │ └── supertagger/
│ │ ├── st.config
│ │ ├── stprior.flm
│ │ └── vocab.flm
│ ├── plugins/
│ │ ├── MyGenSynScorer.java
│ │ ├── MyNgramCombo.java
│ │ ├── MyNgramGenSynProduct.java
│ │ ├── MyNgramPrecisionBaselineGenInterp.java
│ │ ├── MyNgramPrecisionPerceptronInterp.java
│ │ ├── MyParserPerceptronScorer.java
│ │ ├── MyRealizerPerceptronScorer.java
│ │ ├── MySynAgrFeatureExtractor.java
│ │ ├── MySynSemAgrFeatureExtractor.java
│ │ └── MySynSemFeatureExtractor.java
│ ├── stanford-nlp/
│ │ ├── classifiers/
│ │ │ └── stanfordner-README
│ │ └── stanfordnlp-README
│ └── templates/
│ ├── addFilterLexFeats.xsl
│ ├── addStems.xsl
│ ├── adjustAppos.xsl
│ ├── adjustCandCcats1.xsl
│ ├── adjustCats.xsl
│ ├── adjustParenthetical.xsl
│ ├── adjustReportedSpeech.xsl
│ ├── adjustRoles.xsl
│ ├── adv-placement.xsl
│ ├── agr-macroInsert.xsl
│ ├── allotIdLeaf.xsl
│ ├── allotIdTree.xsl
│ ├── allotIndexRel.xsl
│ ├── anim-macroInsert.xsl
│ ├── annotateAppos-Dash.xsl
│ ├── annotateAppos1.xsl
│ ├── annotateAppos2.xsl
│ ├── annotateAppos3.xsl
│ ├── annotateBrackets.xsl
│ ├── annotateColons.xsl
│ ├── annotateDots.xsl
│ ├── annotateExtraposedAppos.xsl
│ ├── annotateNom-AdjConj.xsl
│ ├── annotateParentheticals1.xsl
│ ├── annotateParentheticals2.xsl
│ ├── annotatePlace.xsl
│ ├── annotatePrtConjs.xsl
│ ├── annotateQuotes.xsl
│ ├── annotateReportedSpeech.xsl
│ ├── annotateStrayAppos.xsl
│ ├── annotateVPCommas.xsl
│ ├── balanceAppos.xsl
│ ├── balanceDash-Paren.xsl
│ ├── catCheck.xsl
│ ├── ccgRules.xsl
│ ├── changePunct.xsl
│ ├── closedCatInsert.xsl
│ ├── collapseMWUFull.xsl
│ ├── collapseMWUPart.xsl
│ ├── collapseMWUSharedTask.xsl
│ ├── computeCats.xsl
│ ├── convTags.xsl
│ ├── correctMistakes1.xsl
│ ├── correctPPHeads.xsl
│ ├── exportToAuto.xsl
│ ├── filterLex.xsl
│ ├── find-s-back-n.xsl
│ ├── genchal11-out.xsl
│ ├── inferConjRules.xsl
│ ├── insertLF.xsl
│ ├── insertOrigPunctsLF.xsl
│ ├── insertPTBInfo.xsl
│ ├── insertPunctLF-PosMod.xsl
│ ├── insertPunctLF.xsl
│ ├── insertQuoteSemClassInfo.xsl
│ ├── insertSemFeats.xsl
│ ├── introduceMMExtns.xsl
│ ├── labelAppos.xsl
│ ├── labelConj1.xsl
│ ├── labelConj2.xsl
│ ├── labelConj3.xsl
│ ├── labelPlace1.xsl
│ ├── labelPlace2.xsl
│ ├── labelPuncts.xsl
│ ├── lexExtr.xsl
│ ├── macroInsert.xsl
│ ├── macroLexDef.xsl
│ ├── markMistakes.xsl
│ ├── markUnmatched.xsl
│ ├── mergeMorph.xsl
│ ├── morphExtr.xsl
│ ├── normPTBTags.xsl
│ ├── normPunctPos.xsl
│ ├── origPunctRules.xsl
│ ├── overtWHLexRels.xsl
│ ├── overtWHPronouns.xsl
│ ├── phraseExtractor.xsl
│ ├── preSentAdj.xsl
│ ├── punctLexConjRules.xsl
│ ├── reinsertPTBInfo.xsl
│ ├── repairUnmatched.xsl
│ ├── replaceColons.xsl
│ ├── rulesExtr.xsl
│ ├── sentFinalPuncts.xsl
│ ├── trueCaser.xsl
│ └── uncurryBareParse.xsl
├── devel/
│ ├── BEN.TODO
│ └── schedule.txt
├── docs/
│ ├── build.xml
│ ├── ccgbank-README
│ ├── guide/
│ │ ├── build.xml
│ │ ├── cgloss4e.sty
│ │ ├── gb4e.sty
│ │ ├── guide.tex
│ │ ├── openccg.bib
│ │ └── openccg.sty
│ ├── index.html
│ ├── maxent.cpp.patch
│ ├── realizer/
│ │ ├── build.xml
│ │ ├── cgloss4e.sty
│ │ ├── gb4e.sty
│ │ ├── manual.tex
│ │ ├── openccg.sty
│ │ └── refs.bib
│ ├── style.css
│ └── taggers-README
├── grammars/
│ ├── add-chunks.xsl
│ ├── add-family-members.xsl
│ ├── append.xsl
│ ├── categories.xsd
│ ├── comic/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── dict.xsl
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xsl
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── rules-base.xml
│ │ ├── rules.xml
│ │ ├── testbed.xml
│ │ ├── types-extras.xml
│ │ └── types.xml
│ ├── convert-lists.xsl
│ ├── convert-to-graph.xsl
│ ├── convert-to-hlds.xsl
│ ├── core-en/
│ │ ├── add-chunks.xsl
│ │ ├── add-intonation-info.xsl
│ │ ├── adj.xsl
│ │ ├── adv.xsl
│ │ ├── auxv.xsl
│ │ ├── cats.xsl
│ │ ├── conj.xsl
│ │ ├── derive-features.xsl
│ │ ├── det.xsl
│ │ ├── dict.xsl
│ │ ├── drop-features.xsl
│ │ ├── lexicon.xsl
│ │ ├── misc.xsl
│ │ ├── np.xsl
│ │ ├── pp.xsl
│ │ ├── punct.xsl
│ │ ├── raise-nodes.xsl
│ │ ├── templates.xsl
│ │ ├── types.xml
│ │ ├── unary-rules.xsl
│ │ └── v.xsl
│ ├── dict.xsd
│ ├── extract-morph.xsl
│ ├── flights/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── dict.xsl
│ │ ├── flairs.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xsl
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── nina107.xml
│ │ ├── rules-base.xml
│ │ ├── rules.xml
│ │ ├── testbed.xml
│ │ ├── types-extras.xml
│ │ ├── types.xml
│ │ └── vera.xml
│ ├── grammar.xsd
│ ├── hlds.xsd
│ ├── lexicon.xsd
│ ├── mini-basque/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── mini-dyirbal/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── mini-english/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── mini-inuit/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── mini-nezperce/
│ │ ├── build.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ └── types.xml
│ ├── mini-tagalog/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── mini-turkish/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── morph.xsd
│ ├── parameters.xsd
│ ├── parametric-lexicon.xsl
│ ├── parametric-types.xsl
│ ├── raise-nodes.xsl
│ ├── routes/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── dlf_test.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── rules.xml
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── rules.xsd
│ ├── simplify-lists.xsl
│ ├── tiny/
│ │ ├── build.xml
│ │ ├── grammar.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── rules.xml
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── tokens.xsd
│ ├── treeify-lists.xsl
│ ├── types.xsd
│ └── worldcup/
│ ├── add-chunks.xsl
│ ├── build.xml
│ ├── dict.xml
│ ├── grammar.xml
│ ├── lexicon-base.xsl
│ ├── lexicon.xml
│ ├── morph.xml
│ ├── raise-nodes.xsl
│ ├── rules.xml
│ └── testbed.xml
├── lib/
│ ├── ASL
│ ├── LGPL
│ ├── LIBNOTES
│ ├── MIT
│ ├── SUN
│ ├── jdom.license
│ └── jline.license
├── pom.xml
├── src/
│ ├── ccg2xml/
│ │ ├── README
│ │ ├── Tree.py
│ │ ├── arabic.ccg
│ │ ├── build.xml
│ │ ├── ccg.ply
│ │ ├── ccg_editor.py
│ │ ├── convert-ply.py
│ │ ├── grammar_template.ccg
│ │ ├── lex.py
│ │ └── yacc.py
│ ├── kenlm/
│ │ ├── COPYING
│ │ ├── COPYING.LESSER
│ │ ├── LICENSE
│ │ ├── README
│ │ ├── build_jnilib.sh
│ │ ├── clean_query_only.sh
│ │ ├── jni/
│ │ │ └── wrap.cc
│ │ ├── lm/
│ │ │ ├── Jamfile
│ │ │ ├── bhiksha.cc
│ │ │ ├── bhiksha.hh
│ │ │ ├── binary_format.cc
│ │ │ ├── binary_format.hh
│ │ │ ├── blank.hh
│ │ │ ├── build_binary_main.cc
│ │ │ ├── config.cc
│ │ │ ├── config.hh
│ │ │ ├── enumerate_vocab.hh
│ │ │ ├── facade.hh
│ │ │ ├── fragment_main.cc
│ │ │ ├── kenlm_max_order_main.cc
│ │ │ ├── left.hh
│ │ │ ├── left_test.cc
│ │ │ ├── lm_exception.cc
│ │ │ ├── lm_exception.hh
│ │ │ ├── max_order.hh
│ │ │ ├── model.cc
│ │ │ ├── model.hh
│ │ │ ├── model_test.cc
│ │ │ ├── model_type.hh
│ │ │ ├── ngram_query.hh
│ │ │ ├── partial.hh
│ │ │ ├── partial_test.cc
│ │ │ ├── quantize.cc
│ │ │ ├── quantize.hh
│ │ │ ├── query_main.cc
│ │ │ ├── read_arpa.cc
│ │ │ ├── read_arpa.hh
│ │ │ ├── return.hh
│ │ │ ├── search_hashed.cc
│ │ │ ├── search_hashed.hh
│ │ │ ├── search_trie.cc
│ │ │ ├── search_trie.hh
│ │ │ ├── sizes.cc
│ │ │ ├── sizes.hh
│ │ │ ├── state.hh
│ │ │ ├── test.arpa
│ │ │ ├── test_nounk.arpa
│ │ │ ├── trie.cc
│ │ │ ├── trie.hh
│ │ │ ├── trie_sort.cc
│ │ │ ├── trie_sort.hh
│ │ │ ├── value.hh
│ │ │ ├── value_build.cc
│ │ │ ├── value_build.hh
│ │ │ ├── virtual_interface.cc
│ │ │ ├── virtual_interface.hh
│ │ │ ├── weights.hh
│ │ │ └── word_index.hh
│ │ └── util/
│ │ ├── Jamfile
│ │ ├── bit_packing.cc
│ │ ├── bit_packing.hh
│ │ ├── bit_packing_test.cc
│ │ ├── double-conversion/
│ │ │ ├── Jamfile
│ │ │ ├── LICENSE
│ │ │ ├── bignum-dtoa.cc
│ │ │ ├── bignum-dtoa.h
│ │ │ ├── bignum.cc
│ │ │ ├── bignum.h
│ │ │ ├── cached-powers.cc
│ │ │ ├── cached-powers.h
│ │ │ ├── diy-fp.cc
│ │ │ ├── diy-fp.h
│ │ │ ├── double-conversion.cc
│ │ │ ├── double-conversion.h
│ │ │ ├── fast-dtoa.cc
│ │ │ ├── fast-dtoa.h
│ │ │ ├── fixed-dtoa.cc
│ │ │ ├── fixed-dtoa.h
│ │ │ ├── ieee.h
│ │ │ ├── strtod.cc
│ │ │ ├── strtod.h
│ │ │ └── utils.h
│ │ ├── ersatz_progress.cc
│ │ ├── ersatz_progress.hh
│ │ ├── exception.cc
│ │ ├── exception.hh
│ │ ├── fake_ofstream.hh
│ │ ├── file.cc
│ │ ├── file.hh
│ │ ├── file_piece.cc
│ │ ├── file_piece.hh
│ │ ├── file_piece_test.cc
│ │ ├── getopt.c
│ │ ├── getopt.hh
│ │ ├── have.hh
│ │ ├── joint_sort.hh
│ │ ├── joint_sort_test.cc
│ │ ├── mmap.cc
│ │ ├── mmap.hh
│ │ ├── multi_intersection.hh
│ │ ├── multi_intersection_test.cc
│ │ ├── murmur_hash.cc
│ │ ├── murmur_hash.hh
│ │ ├── pcqueue.hh
│ │ ├── pool.cc
│ │ ├── pool.hh
│ │ ├── probing_hash_table.hh
│ │ ├── probing_hash_table_test.cc
│ │ ├── proxy_iterator.hh
│ │ ├── read_compressed.cc
│ │ ├── read_compressed.hh
│ │ ├── read_compressed_test.cc
│ │ ├── scoped.cc
│ │ ├── scoped.hh
│ │ ├── sized_iterator.hh
│ │ ├── sorted_uniform.hh
│ │ ├── sorted_uniform_test.cc
│ │ ├── string_piece.cc
│ │ ├── string_piece.hh
│ │ ├── string_piece_hash.hh
│ │ ├── thread_pool.hh
│ │ ├── tokenize_piece.hh
│ │ ├── tokenize_piece_test.cc
│ │ ├── usage.cc
│ │ └── usage.hh
│ ├── opennlp/
│ │ ├── ccg/
│ │ │ ├── Parse.java
│ │ │ ├── Realize.java
│ │ │ ├── TextCCG.java
│ │ │ ├── WebCCG.java
│ │ │ ├── alignment/
│ │ │ │ ├── AbstractEncodingScheme.java
│ │ │ │ ├── Alignment.java
│ │ │ │ ├── Alignments.java
│ │ │ │ ├── EncodingScheme.java
│ │ │ │ ├── IdentifiedPhraseReader.java
│ │ │ │ ├── IdentifiedPhraseWriter.java
│ │ │ │ ├── IndexBase.java
│ │ │ │ ├── Mapping.java
│ │ │ │ ├── MappingFormat.java
│ │ │ │ ├── MappingGroup.java
│ │ │ │ ├── MappingReader.java
│ │ │ │ ├── MappingWriter.java
│ │ │ │ ├── MosesEncodingScheme.java
│ │ │ │ ├── NAACLEncodingScheme.java
│ │ │ │ ├── Phrase.java
│ │ │ │ ├── PhrasePosition.java
│ │ │ │ ├── PhraseReader.java
│ │ │ │ ├── PhraseWriter.java
│ │ │ │ ├── Status.java
│ │ │ │ └── package.html
│ │ │ ├── disjunctivizer/
│ │ │ │ ├── AlignedEdgeFilter.java
│ │ │ │ ├── Disjunctivizer.java
│ │ │ │ ├── EdgeMatchFilter.java
│ │ │ │ ├── FilteredLFEdgeSet.java
│ │ │ │ ├── LFGraphDifference.java
│ │ │ │ ├── LabelMatchFilter.java
│ │ │ │ ├── MatchType.java
│ │ │ │ ├── MatchTypeFilter.java
│ │ │ │ ├── VertexMatchFilter.java
│ │ │ │ └── package.html
│ │ │ ├── grammar/
│ │ │ │ ├── AbstractApplicationRule.java
│ │ │ │ ├── AbstractCompositionRule.java
│ │ │ │ ├── AbstractRule.java
│ │ │ │ ├── AbstractSubstitutionRule.java
│ │ │ │ ├── AbstractTypeRaisingRule.java
│ │ │ │ ├── BackwardApplication.java
│ │ │ │ ├── BackwardComposition.java
│ │ │ │ ├── BackwardSubstitution.java
│ │ │ │ ├── BackwardTypeRaising.java
│ │ │ │ ├── ForwardApplication.java
│ │ │ │ ├── ForwardComposition.java
│ │ │ │ ├── ForwardSubstitution.java
│ │ │ │ ├── ForwardTypeRaising.java
│ │ │ │ ├── FragmentJoining.java
│ │ │ │ ├── GlueRule.java
│ │ │ │ ├── Grammar.java
│ │ │ │ ├── Rule.java
│ │ │ │ ├── RuleGroup.java
│ │ │ │ ├── TypeChangingRule.java
│ │ │ │ ├── Types.java
│ │ │ │ └── to-apml.xsl
│ │ │ ├── grammardoc/
│ │ │ │ ├── AbstractDocumenter.java
│ │ │ │ ├── Documenter.java
│ │ │ │ ├── DocumenterContext.java
│ │ │ │ ├── DocumenterException.java
│ │ │ │ ├── DocumenterFactory.java
│ │ │ │ ├── DocumenterName.java
│ │ │ │ ├── DocumenterNotFoundException.java
│ │ │ │ ├── DocumenterSourceException.java
│ │ │ │ ├── GrammarDoc.java
│ │ │ │ ├── GrammarDocException.java
│ │ │ │ ├── SourceGrammar.java
│ │ │ │ ├── SourceGrammarFile.java
│ │ │ │ ├── SourceGrammarFileType.java
│ │ │ │ └── html/
│ │ │ │ ├── HTMLDocumenter.java
│ │ │ │ ├── base.xsl
│ │ │ │ ├── categories.xsl
│ │ │ │ ├── comments.xsl
│ │ │ │ ├── grammar.xsl
│ │ │ │ ├── grammardoc.css
│ │ │ │ ├── lexicon.js
│ │ │ │ ├── lexicon.xsl
│ │ │ │ ├── morph.xsl
│ │ │ │ ├── navigation.xsl
│ │ │ │ ├── rules.xsl
│ │ │ │ └── types.xsl
│ │ │ ├── hylo/
│ │ │ │ ├── Alt.java
│ │ │ │ ├── Box.java
│ │ │ │ ├── Compacter.java
│ │ │ │ ├── Converter.java
│ │ │ │ ├── Diamond.java
│ │ │ │ ├── EPsScorer.java
│ │ │ │ ├── EnglishAgreementExtractor.java
│ │ │ │ ├── Flattener.java
│ │ │ │ ├── HyloAtom.java
│ │ │ │ ├── HyloFormula.java
│ │ │ │ ├── HyloHelper.java
│ │ │ │ ├── HyloVar.java
│ │ │ │ ├── LexDepFeatureExtractor.java
│ │ │ │ ├── LexDependency.java
│ │ │ │ ├── ModalOp.java
│ │ │ │ ├── Mode.java
│ │ │ │ ├── ModeLabel.java
│ │ │ │ ├── ModeVar.java
│ │ │ │ ├── Nominal.java
│ │ │ │ ├── NominalAtom.java
│ │ │ │ ├── NominalVar.java
│ │ │ │ ├── Op.java
│ │ │ │ ├── Proposition.java
│ │ │ │ ├── SatOp.java
│ │ │ │ └── graph/
│ │ │ │ ├── DefaultLFEdgeFactory.java
│ │ │ │ ├── LFEdge.java
│ │ │ │ ├── LFEdgeFactory.java
│ │ │ │ ├── LFEdgeLabel.java
│ │ │ │ ├── LFGraph.java
│ │ │ │ ├── LFGraphFactory.java
│ │ │ │ ├── LFVertex.java
│ │ │ │ ├── LFVertexType.java
│ │ │ │ └── package.html
│ │ │ ├── lexicon/
│ │ │ │ ├── DataItem.java
│ │ │ │ ├── DefaultTokenizer.java
│ │ │ │ ├── EnglishExpander.java
│ │ │ │ ├── EntriesItem.java
│ │ │ │ ├── FactorChainWord.java
│ │ │ │ ├── FactorKey.java
│ │ │ │ ├── Family.java
│ │ │ │ ├── FullWord.java
│ │ │ │ ├── LexException.java
│ │ │ │ ├── Lexicon.java
│ │ │ │ ├── LicensingFeature.java
│ │ │ │ ├── ListPairWord.java
│ │ │ │ ├── MacroAdder.java
│ │ │ │ ├── MacroItem.java
│ │ │ │ ├── MorphItem.java
│ │ │ │ ├── SimpleWord.java
│ │ │ │ ├── SupertaggerAdapter.java
│ │ │ │ ├── Tokenizer.java
│ │ │ │ ├── TrueCaser.java
│ │ │ │ ├── Word.java
│ │ │ │ └── WordWithPitchAccent.java
│ │ │ ├── ngrams/
│ │ │ │ ├── AAnFilter.java
│ │ │ │ ├── AbstractStandardNgramModel.java
│ │ │ │ ├── ConditionalProbabilityTable.java
│ │ │ │ ├── FactoredNgramModel.java
│ │ │ │ ├── FactoredNgramModelFamily.java
│ │ │ │ ├── KenNgramModel.java
│ │ │ │ ├── LinearNgramScorerCombo.java
│ │ │ │ ├── NgramDiversityPruningStrategy.java
│ │ │ │ ├── NgramFilter.java
│ │ │ │ ├── NgramPrecisionModel.java
│ │ │ │ ├── NgramScorer.java
│ │ │ │ ├── RepetitionScorer.java
│ │ │ │ ├── Reversible.java
│ │ │ │ ├── SRILMNgramModel.java
│ │ │ │ ├── SRILMNgramModelType.java
│ │ │ │ ├── SRILM_FactoredScorerMaker.java
│ │ │ │ ├── SRILM_ScorerMaker.java
│ │ │ │ ├── SelfParaphraseBiaser.java
│ │ │ │ ├── SignScorerInterpolation.java
│ │ │ │ ├── SignScorerProduct.java
│ │ │ │ ├── StandardNgramModel.java
│ │ │ │ └── kenlm/
│ │ │ │ ├── MurmurHash.java
│ │ │ │ └── jni/
│ │ │ │ └── KenLM.java
│ │ │ ├── parse/
│ │ │ │ ├── Chart.java
│ │ │ │ ├── DerivationHistory.java
│ │ │ │ ├── Edge.java
│ │ │ │ ├── EdgeHash.java
│ │ │ │ ├── ParseException.java
│ │ │ │ ├── Parser.java
│ │ │ │ ├── Supertagger.java
│ │ │ │ ├── postagger/
│ │ │ │ │ ├── BasicPOSTagger.java
│ │ │ │ │ ├── DummyPOSTagger.java
│ │ │ │ │ ├── POSTagSequenceGetter.java
│ │ │ │ │ ├── POSTagger.java
│ │ │ │ │ └── ml/
│ │ │ │ │ ├── POSPriorModel.java
│ │ │ │ │ └── POSTagFex.java
│ │ │ │ ├── supertagger/
│ │ │ │ │ ├── JavaSupertaggingApp.java
│ │ │ │ │ ├── LabellingStrategy.java
│ │ │ │ │ ├── WordAndPOSDictionaryLabellingStrategy.java
│ │ │ │ │ ├── io/
│ │ │ │ │ │ ├── XMLPOSDictionaryReader.java
│ │ │ │ │ │ └── XMLWordDictionaryReader.java
│ │ │ │ │ ├── ml/
│ │ │ │ │ │ ├── FeatureExtractor.java
│ │ │ │ │ │ ├── STFex.java
│ │ │ │ │ │ ├── STPriorModel.java
│ │ │ │ │ │ └── ZhangLeTrainingExtractor.java
│ │ │ │ │ └── util/
│ │ │ │ │ ├── PipedTokenizer.java
│ │ │ │ │ ├── ProbPairComparator.java
│ │ │ │ │ ├── STTaggerDictionary.java
│ │ │ │ │ ├── STTaggerPOSDictionary.java
│ │ │ │ │ ├── STTaggerWordDictionary.java
│ │ │ │ │ ├── SupertagSequenceGetter.java
│ │ │ │ │ └── TaggingDictionaryExtractor.java
│ │ │ │ └── tagger/
│ │ │ │ ├── Constants.java
│ │ │ │ ├── ProbIndexPair.java
│ │ │ │ ├── TaggedWord.java
│ │ │ │ ├── io/
│ │ │ │ │ ├── CorpusIterator.java
│ │ │ │ │ ├── PipeDelimitedFactoredBundleCorpusIterator.java
│ │ │ │ │ └── SRILMFactoredBundleCorpusIterator.java
│ │ │ │ ├── ml/
│ │ │ │ │ ├── MaxentModel.java
│ │ │ │ │ ├── TaggerFeature.java
│ │ │ │ │ ├── ZLMEM.java
│ │ │ │ │ └── ZLMaxentModel.java
│ │ │ │ ├── sequencescoring/
│ │ │ │ │ ├── Backpointer.java
│ │ │ │ │ ├── FBNode.java
│ │ │ │ │ ├── SequenceScorer.java
│ │ │ │ │ └── Trellis.java
│ │ │ │ └── util/
│ │ │ │ ├── CCGBankToSRILMFLM.java
│ │ │ │ ├── ConfigFileProcessor.java
│ │ │ │ └── ResultSink.java
│ │ │ ├── perceptron/
│ │ │ │ ├── Alphabet.java
│ │ │ │ ├── ComposedFeatureExtractor.java
│ │ │ │ ├── ComposedFeatureVector.java
│ │ │ │ ├── EventFile.java
│ │ │ │ ├── FeatureExtractor.java
│ │ │ │ ├── FeatureList.java
│ │ │ │ ├── FeatureMap.java
│ │ │ │ ├── FeatureVector.java
│ │ │ │ ├── Model.java
│ │ │ │ ├── PerceptronScorer.java
│ │ │ │ ├── ReRankingPerceptronScorer.java
│ │ │ │ └── Trainer.java
│ │ │ ├── realize/
│ │ │ │ ├── Chart.java
│ │ │ │ ├── DiversityPruningStrategy.java
│ │ │ │ ├── Edge.java
│ │ │ │ ├── EdgeCombos.java
│ │ │ │ ├── EdgeFactory.java
│ │ │ │ ├── EdgeHash.java
│ │ │ │ ├── FeatureLicenser.java
│ │ │ │ ├── Hypertagger.java
│ │ │ │ ├── LexicalDiversityPruningStrategy.java
│ │ │ │ ├── NBestPruningStrategy.java
│ │ │ │ ├── PruningStrategy.java
│ │ │ │ ├── Realizer.java
│ │ │ │ ├── RuleInstance.java
│ │ │ │ ├── StemPruningStrategy.java
│ │ │ │ ├── Tracker.java
│ │ │ │ └── hypertagger/
│ │ │ │ ├── FeatureExtractionException.java
│ │ │ │ ├── LFInfo.java
│ │ │ │ ├── LFLoader.java
│ │ │ │ ├── LMFactorExtractor.java
│ │ │ │ ├── TagExtract.java
│ │ │ │ ├── TagExtractor.java
│ │ │ │ ├── ZLMaxentHypertagger.java
│ │ │ │ ├── ZLMaxentModel.java
│ │ │ │ └── ZLPOSTagger.java
│ │ │ ├── synsem/
│ │ │ │ ├── AbstractCat.java
│ │ │ │ ├── Arg.java
│ │ │ │ ├── ArgStack.java
│ │ │ │ ├── AtomCat.java
│ │ │ │ ├── BasicArg.java
│ │ │ │ ├── CatReader.java
│ │ │ │ ├── Category.java
│ │ │ │ ├── CategoryFcn.java
│ │ │ │ ├── CategoryFcnAdapter.java
│ │ │ │ ├── ComplexCat.java
│ │ │ │ ├── DerivationHandler.java
│ │ │ │ ├── Dollar.java
│ │ │ │ ├── GenerativeSyntacticModel.java
│ │ │ │ ├── LF.java
│ │ │ │ ├── LexLogProbFeatureExtractor.java
│ │ │ │ ├── LexSemOrigin.java
│ │ │ │ ├── Modality.java
│ │ │ │ ├── ReRankingScorer.java
│ │ │ │ ├── SetArg.java
│ │ │ │ ├── Sign.java
│ │ │ │ ├── SignHash.java
│ │ │ │ ├── SignScorer.java
│ │ │ │ ├── Slash.java
│ │ │ │ ├── SlashMode.java
│ │ │ │ ├── SyntacticFeatureExtractor.java
│ │ │ │ ├── TargetCat.java
│ │ │ │ └── VarModality.java
│ │ │ ├── test/
│ │ │ │ ├── CrossValidateRealizer.java
│ │ │ │ ├── DerivMaker.java
│ │ │ │ ├── GenTargets.java
│ │ │ │ ├── Regression.java
│ │ │ │ ├── RegressionInfo.java
│ │ │ │ ├── ScorerMaker.java
│ │ │ │ ├── TimingMap.java
│ │ │ │ ├── UpdateTestbed.java
│ │ │ │ └── Validator.java
│ │ │ ├── unify/
│ │ │ │ ├── EmptySubstitution.java
│ │ │ │ ├── Feature.java
│ │ │ │ ├── FeatureStructure.java
│ │ │ │ ├── GFeatStruc.java
│ │ │ │ ├── GFeatVar.java
│ │ │ │ ├── GSubstitution.java
│ │ │ │ ├── GUnifier.java
│ │ │ │ ├── Indexed.java
│ │ │ │ ├── ModFcn.java
│ │ │ │ ├── Mutable.java
│ │ │ │ ├── SelfCondensingSub.java
│ │ │ │ ├── SimpleSubstitution.java
│ │ │ │ ├── SimpleType.java
│ │ │ │ ├── Substitution.java
│ │ │ │ ├── Unifiable.java
│ │ │ │ ├── Unifier.java
│ │ │ │ ├── UnifyControl.java
│ │ │ │ ├── UnifyFailure.java
│ │ │ │ └── Variable.java
│ │ │ └── util/
│ │ │ ├── ArrayListWithIdentityEquals.java
│ │ │ ├── CompositeFilter.java
│ │ │ ├── DelegatedFilter.java
│ │ │ ├── DisplayPrefs.java
│ │ │ ├── Filter.java
│ │ │ ├── FilteredMap.java
│ │ │ ├── FilteredSet.java
│ │ │ ├── GroupMap.java
│ │ │ ├── IntHashSetMap.java
│ │ │ ├── Interner.java
│ │ │ ├── InverseFilter.java
│ │ │ ├── JLineReader.java
│ │ │ ├── LineReader.java
│ │ │ ├── ListMap.java
│ │ │ ├── MembershipFilter.java
│ │ │ ├── Pair.java
│ │ │ ├── SingletonList.java
│ │ │ ├── StructureSharingList.java
│ │ │ ├── TrieMap.java
│ │ │ ├── VisitedFilter.java
│ │ │ ├── Visualizer.java
│ │ │ └── XmlScanner.java
│ │ └── ccgbank/
│ │ ├── CCGBankConvert.java
│ │ ├── CCGBankExtract.java
│ │ ├── CCGBankTask.java
│ │ ├── CCGBankTaskFileGroup.java
│ │ ├── CCGBankTaskSources.java
│ │ ├── CCGBankTaskTemplates.java
│ │ ├── CCGBankTaskTestbed.java
│ │ ├── InputSourceAdapter.java
│ │ ├── TemplatesProcessor.java
│ │ ├── XMLFilterProcessor.java
│ │ ├── XSLTProcessor.java
│ │ ├── ccgbank.properties
│ │ ├── convert/
│ │ │ ├── ApposTally.java
│ │ │ ├── DiscrCheck.java
│ │ │ ├── GenChal11Adjuster.java
│ │ │ ├── GenConjRule.java
│ │ │ ├── InfoHelper.java
│ │ │ ├── Javafns.java
│ │ │ ├── MWHelper.java
│ │ │ ├── MorphLookup.java
│ │ │ ├── OrigPunctRules.java
│ │ │ ├── PunctHelper.java
│ │ │ ├── RoleAdjuster.java
│ │ │ └── XSLTTrueCaser.java
│ │ ├── extract/
│ │ │ ├── CatNode.java
│ │ │ ├── DebugHelper.java
│ │ │ ├── DefaultLFHelper.java
│ │ │ ├── ExtractGrammar.java
│ │ │ ├── FreqTally.java
│ │ │ ├── InsertLFHelper.java
│ │ │ ├── LexExtract.java
│ │ │ ├── MorphExtrHelper.java
│ │ │ ├── MorphExtract.java
│ │ │ ├── RulesExtract.java
│ │ │ ├── RulesTally.java
│ │ │ └── Testbed.java
│ │ ├── lexicon-base.xsl
│ │ ├── parse/
│ │ │ ├── CCGbankDerivation.jjt
│ │ │ ├── SimpleNode.java
│ │ │ └── grammarInsert
│ │ └── rules-base.xsl
│ ├── pom.xml
│ └── srilmbridge/
│ ├── Makefile
│ └── srilmbridge.cpp
└── test/
├── grammar.xml
├── lexicon.xml
├── morph.xml
├── opennlp/
│ └── ccg/
│ ├── alignment/
│ │ ├── AlignmentTest.java
│ │ ├── IdentifiedPhraseReaderWriterTest.java
│ │ ├── IndexBaseTest.java
│ │ ├── MappingFormatTest.java
│ │ ├── MappingGroupTest.java
│ │ ├── MappingReaderWriterTest.java
│ │ ├── MappingTest.java
│ │ ├── PhraseReaderWriterTest.java
│ │ └── PhraseTest.java
│ ├── disjunctivizer/
│ │ ├── AlignedEdgeFilterTest.java
│ │ ├── DisjunctivizerTest.java
│ │ ├── EdgeMatchFilterTest.java
│ │ ├── FilteredLFEdgeSetTest.java
│ │ ├── LFGraphDifferenceTest.java
│ │ ├── LabelMatchFilterTest.java
│ │ └── VertexMatchFilterTest.java
│ ├── hylo/
│ │ └── graph/
│ │ ├── LFBaseTest.java
│ │ ├── LFEdgeFactoryTest.java
│ │ ├── LFEdgeTest.java
│ │ ├── LFGraphTest.java
│ │ └── LFVertexTest.java
│ └── util/
│ ├── CompositeFilterTest.java
│ ├── DelegatedFilterTest.java
│ ├── FilteredMapTest.java
│ ├── FilteredSetTest.java
│ ├── InverseFilterTest.java
│ ├── MembershipFilterTest.java
│ └── VisitedFilterTest.java
├── output.xml
├── paraphrases.xml
├── rules.xml
└── testlf.xml
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.sw?
*~
*.jar
*.so
*.class
*.pyc
*.tgz
*.gz
*.lm
*.3bo
*.mod
vocab.*
!vocab.flm
.project
.classpath
bin/ccg2xml.py
bin/lex.py
bin/yacc.py
bin/ccg_editor.py
bin/Tree.py
ccgbank/convert/
ccgbank/data/novel/two-sents.dir/
ccgbank/extract/*
!ccgbank/extract/grammar.xml
!ccgbank/extract/*.xsl
ccgbank/feats/hypertagger/
ccgbank/feats/parser/
ccgbank/feats/realizer/
ccgbank/feats/supertagger/
ccgbank/logs/
ccgbank/models/*/*dict*
ccgbank/models/realizer/excl/
ccgbank/original/corpus/
ccgbank/original/feats/postagger/
ccgbank/original/feats/supertagger/
ccgbank/original/logs/
ccgbank/propccgbank/
ccgbank/stanford-nlp/*.jar
ccgbank/stanford-nlp/classifiers/*.prop
docs/api/
docs/grammars-rough-guide.pdf
docs/realizer-manual.pdf
docs/guide/guide.*
!docs/guide/guide.tex
docs/realizer/manual.*
!docs/realizer/manual.tex
lib/openccg.jar
output/
src/ccg2xml/ccg2xml.py
src/srilmbridge/*.h
================================================
FILE: AUTHORS
================================================
Main Authors:
Core Java Code:
Jason Baldridge <jmb -at- cogsci.ed.ac.uk>
Gann Bierner <gbierner -at- cogsci.ed.ac.uk>
Michael White <mwhite -at- inf.ed.ac.uk>
CCG-to-XML:
Ben Wing <ben -at- 666.com>
Hypertagger:
Dominic Espinosa <espinosa -at- ling.osu.edu>
Supertagger:
Dennis Mehay <mehay -at- ling.osu.edu>
Disjunctivizer:
Scott Martin <scott -at- ling.osu.edu>
Additional Contributors:
Jonathan Barker <barker -at- ling.osu.edu>
semantic graph visualization tool
Cem Bozsahin <cem.bozsahin -at- ed.ac.uk / bozsahin -at- metu.edu.tr>
grammars from Bozsahin and Steedman (2003)
Gunes Erkan <gunes -at- ceng.metu.edu.tr>
handling of type hierachies
Dennis Mehay <mehay -at- ling.osu.edu>
KenLM interface
(in addition to supertagger)
Scott Martin <scott -at- ling.osu.edu>
GrammarDoc
incoporating the SRILM toolkit for scoring
build process for CCGbank grammar extraction
(in addition to disjunctivizer)
Rajakrishnan Rajkumar <raja -at- ling.osu.edu>
build files and XSLT transforms for CCGbank grammar extraction
English agreement model
David Reitter <dreitter -at- inf.ed.ac.uk>
command completion and per grammar history
"tiny" grammar
Alexandros Triantafyllidis <s0345201 -at- sms.ed.ac.uk>
visualization of derivations via latex
Ben Wing <ben -at- 666.com>
wccg and WebCCG code; CCG-format grammars
(in addition to ccg2xml)
================================================
FILE: CHANGES
================================================
0.9.6 - ...
-----------
* Updated .gitignore, CHANGES and docs/index.html for transition to
GitHub
0.9.5 - dependency length minimization, disjunctivizer, KenLM
-------------------------------------------------------------
* Added features for dependency ordering and dependency length
minimization in realization.
* Added disjunctivizer package, for creating a disjunctive LF XML
structure based on an LF graph difference.
* Added support for using a very large 5-gram memory-mapped language
model with KenLM on linux.
* Added n-best parser output.
* Added option for in-memory perceptron training.
0.9.4 - broad coverage paraphrasing, CCGbank training
-----------------------------------------------------
* Added Hockenmaier-style generative probability model for parsing and
realization.
* Added supertagger and use of adaptive supertagging in parsing.
* Added build files for CCGbank training, documented in
docs/ccgbank-README, as well as ones for parsing and realizing
novel text (thereby generating grammatical paraphrases).
* Added release targets for CCGbank data and pre-built English models.
* Added use of Stanford tokenizer, morphological analyzer and named
entity recognizer in parsing novel text.
* Added use of ordinary hashing for lex signs, so that signs that
differ only in the pos tag can be distinguished (for robustness).
* Added hypertagger input option and derivation history output to ccg-realize.
* Added n-best realization output to ccg-test.
* Added tracking of lex heads to signs via modifier attr on slashes.
* Added gold standard pred info for training with hypertagger.
* Added initial syntactic feature extractor.
* Added caching of supertags in cats.
* Added option to use word positions in converting atoms in the LFs,
which is now the default. Added :nowordpos command in tccg to
change the preference to the lexical naming option.
* Changed tccg to also update Grammar.theGrammar.prefs, which seems
to have fixed issue with :nosem option not working.
* Refactored feature extraction to use a trie for representing features
as a sequence of interned string keys, to allow for lazy feature
extraction that more quickly filters features not in the alphabet.
* Added serialization of signs.
* Added python script for drawing derivs in .auto files as trees (uses NLTK).
* Added cell pruning limit in realization.
* Added support for 'magic tokens' (like numbers) in ccg2xml,
contributed by vanjena@users.sourceforge.net.
* Turned off caching of category hash codes b/c of problems with stale
values (a method of checking for staleness might be added later).
* Improved utf8 support (esp. for macs). Note that utf8 support seems
hopelessly broken for the Windows command-line, in that none of the
available terminal apps (including for cygwin) both display
characters correctly and work with tccg. I/O to files works fine
though.
* Added xml escaping for bleu and nbest output.
* Added ccg-draw-graph tool for visualizing semantic dependency
graphs.
0.9.3 - minor changes
-------------------
* Added runCommand method in Visualizer so that the latex
visualization works on Linux
* Added id info to test items and bleu output.
* Changed default lex licensing feature to be last in Lexicon.loadLicensingFeatures.
* Added loop for computing closure of licensed no sem edges in EdgeFactory.
* Changed FeatureLicenser to unify feat strucs instead of cats.
0.9.2 - VisCCG release plus initial hypertagger support
-------------------------------------------------------
* Added check for unary rule cycles in parser and realizer.
* Added initial version of greedy fragment assembly in realization when
a complete realization is not found.
* Added case for composition of X/Y Y/Z where Y has arity 2.
* Added option to filter rule apps by observed supercat-rule combos.
* VisCCG: Please see the list of changes in the archives at http://comp.ling.utexas.edu/wiki/doku.php/openccg/dev
* Added LexSemOrigin interface for tracking of origin of lexical
predications back to a sign or unary type changing rule.
* Removed unused LF in DataItem.
* Added supertagger-based filtering to lexical lookup.
* Upgraded to JDOM 1.1.
* Upgraded parser to use ambiguity packing.
* Added scoring and n-best pruning to parser.
* Refactored SignScorer to synsem package, for shared use
by the realizer and parser. NB: This may require minor
refactoring of imports and recompilation of realizer clients.
* Changed realizer to check instantiation of outermost args by
default, thereby improving completness at minor cost to efficiency.
Accordingly, renamed checkInstantiation flag in EdgeFactory to
debugInstantiation, which now controls whether to report such
cats to System.err.
* Added hypertagger (realizer supertagger) interface and initial
version of beta-best realization using it.
* Changed Family.deriveSupertag to remove the semantic part of a
cat name following a colon.
0.9.1 - New tools: grammardoc, ccg2xml; other misc updates
----------------------------------------------------------
* Changed dateFormatNoYear to "*.MM.dd" to avoid ambiguity with
numbers.
* Changed Grammar.initializeTransformers to set indenting more robustly
by adding try-catch blocks for illegal argument exceptions.
* Refactored RuleGroup to apply unary and binary rules separately.
* Refactored Lexicon and RuleGroup to load lex/morph/rule info incrementally,
using a new XmlScanner utility class. These changes avoid the need to
store large XML docs all in memory at once, while keeping the refactoring
to a minimum.
* Revised LF flattening to propapage the alts, opts & chunks based on
the expression structure, rather than the graph structure.
This change makes the 'shared' attribute (on nominal references)
more transparent in how it works with disjunctions that operate on
different levels of the tree.
* Revised LF compaction to allow duplicate predications, where an attempt
is made to attach them in different locations if possible.
* Added GrammarDoc, which generates HTML documentation from a source grammar.
See README, under `Generating Grammar Documentation' for more information.
* Added initial version of ccg2xml, for specifying grammars in the
more human-friendly .ccg format.
* Changed build system
- Made separate build files for ccg2xml and documentation
- Made the `release' target of the main build file create a binary for
distribution, instead of just the source
0.9.0 - Disjunctive LFs
-----------------------
* Refactored realizer to put all no-sem edges on the agenda,
which requires making an exception for edges with no indices
in the implementation of the index filter, but otherwise
yields a more uniform approach to creating edges.
* Refactored realizer to use representative edges (one per cat)
instead of edge groups, which ends up being simpler on the whole
and should be easier to explain.
* Refactored categories to allow for equality checks with and
without taking the LFs into consideration.
* Refactored edge equiv classes to use coverage bit vector
and cat sans LF to check equality.
* Refactored lex instantiation to produce all possible instantiations
that respect the alt exclusivity constraints.
* Changed Sign, DerivationHistory to store rule object.
* Changed alt edge construction to create new LF from input signs and rule,
since signs in equiv class of alts can now have different LFs.
* Added active alt tracking and completing of edges with optional bits.
* Changed HyloVar to check for equal types when checking for equality up
to var renaming.
* Refactored generics to avoid type warnings in Eclipse.
* Relaxed LF chunking constraints to allow combinations with edges
(or trackers more generally) that are shared across multiple
alt set options.
* Added "shared" attribute to nominal terms to indicate references
to nodes that are shared across alternatives in a disjunctive LF;
then revamped and reinforced the LF chunking constraints.
* Fixed problem with signMap not pointing to opt-completed edge.
* Improved edge printing from realizer chart to show derivations.
* Updated realizer to keep edges whose signs have the simplest derivation,
among those with the same surface words.
* Added filter for ungrammatical test cases in ccg-test text output.
* Added first draft of realizer manual.
0.8.6 - Java 1.5 switch, n-gram scoring improvements
----------------------------------------------------
* Added propagation of reverse flag on n-gram models.
* Refactored LinearNgramScorerCombo and n-gram models to
support interpolation at the word level.
* Added caching of log probs in NgramScorer, to avoid recomputing
log prob of words for a sign's initial sign.
* Added n-gram diversity pruning strategy.
* Changed SignHash to only keep signs that are unique up to surface words,
thereby ignoring different POS or supertags; also changed it to keep
signs with lower derivational complexity during insertion.
* Added reverse flag for loaded n-gram models with ccg-test, ccg-realize.
* Fixed sentence delimiter text output for reversible standard n-gram models;
made AAnFilter reversible.
* Added Xalan 2.6.0 jars, to support Java 1.5 builds.
* Added support for duration special tokens; note that the implementation has
an unavoidable dependency on Java 1.5.
0.8.5 - "Rough Guide", sem types, command history/completion, and more
----------------------------------------------------------------------
* Added initial core-en/types.xml.
* Generalized feature licensing to allow for selective listing of supertypes
in the also-licensed-by attribute.
* Fixed bug in unifying two vars with simple types.
* Removed useless SignHash.values method; clarified intention to
eventually remove this class.
* Streamlined lexical access for realization.
* Removed superfluous unique stamps in var classes.
* Added support for using simple types (aka sorts) with semantic features
and nominals. During category instantiation, a morph item's class is
assigned to the nominal var(s) for the [*DEFAULT*] proposition, and
the types of all nominal vars are then propagated to all other
nominal vars with the same name, throughout the category.
* Changed tokenizer keep-words-with-sem-classes option in grammar.xsd
to replacement-sem-classes option, where all semantic classes to use
in replacing words with sem classes for language models are listed.
Also changed semantic class replacement routine to uppercase semantic
class names.
* Added initial sem types to core-en, comic, and flights grammars.
* Fixed bug in constructing type hierarchies with multiple inheritance.
* Added ccg-update tool, with initial task to add full words (pre-parsed)
to the testbed file; also updated ccg-test to use the pre-parsed words
when writing training text files.
* Updated ccg-cvr tool to use full words when present.
Also added filter to remove test item duplicates from
cross-validation training sets.
* Added reporting of mean reciprocal rank to ccg-test, as well
as residual mean reciprocal rank, based on the cases that do
not match the target exactly.
* Updated ccg-cvr tool to work with factored language models.
* Fixed null pointer exception in DefaultTokenizer.format, Word.setW methods.
* Added timing of lex lookup to realization metrics.
* Added David's JLine console support to tccg, with command completion and
per grammar history.
* Added handling of coarticulations in the lexicon.
* Added caching of lex lookup during realization.
* Updated to-apml.xsl to handle 'and' in multiword elements.
* Updated visualizer to handle word lists and to ignore coarts.
* Added repetition scorer, for discounting repetitive realizations.
* Added scorer class, pruning strategy class options to ccg-realize.
* Added workaround for saving command history correctly with Java 1.4 on Linux.
* Added 'tiny' grammar.
* Added grammars "rough guide".
* Added supertag as another word attr.
* Revamped LMs to use trie maps, for better speed & scalability.
* Improved handling of nulls in FLMs.
* Cleaned up word representations.
* Added even/odd selection for scoring too in ccg-test.
* Added -reverse and -scorer options to ccg-test.
* Added reverse LM capability.
* Made supertag attrs configurable.
* Switched to JDOM 1.0.
0.8.4 - Factored language models (initial support), packing/unpacking, and more
---------------------------------------------------------------
* Added Alex's latex visualization of derivations
(nb: launch of previewer works better on Windows than Linux)
* Added customizable tokenization and expansion routines for
dates/times/nums/amounts and other named entities.
* Added -2apml option to ccg-test.
* Added Word class and many related changes to tokenization.
* Added -textf|-textfsc options to ccg-test, for writing files in the format
expected by the SRILM toolkit for factored language models.
* Updated copyright notices.
* Changed ngram model to use canonical lists of words as keys,
removing size restriction.
* Added -aanfilter option to ccg-test, with an optional list of
exceptions, which may be culled from bigram counts.
* Added keep-words-with-sem-classes option to grammar.xsd, to
specify exceptional semantic classes where the word form is also
considered relevant for scoring models.
NB: Also changed grammar.xsd to specify a custom tokenizer class name
and/or keep-words-with-sem-classes on a separate
tokenizer element.
* Added support for factored language models with fixed backoff paths,
arranged into families of models for different child variables,
and with the option to have secondary models for shorter available
histories. Also added corresponding -flm|-flmsc options to
ccg-test.
* Added option to do scoring in a second stage, starting from a packed
representation.
* Switch from cached combos to collected combos, making the anytime case
more like the packed case.
* Added compacting of gen forest when unpacking is turned off.
* Added pretty-printing of regex-like gen forest.
0.8.3 - New efficiency methods, Cem-* grammars, and more
---------------------------------------------------------------
* Added grammars from Bozsahin and Steedman (2003).
* Improved instantiation of unary rules, ensuring that the
first pred is used for indexing, and fixing a bug whereby
a rule indexed by a lex pred would be missed.
* Added initial capability to use semantic classes in n-gram scoring,
as shown in ccg-realize.
* Added LF chunking rules, which yields the most dramatic improvement in
efficiency.
* Added systematic feature-based licensing and instantiation.
* Added caching of category combinations.
* Added labeling of the phrase in the XML output headed by the index
associated with the <mark>+ semantic feature.
* Added feature filtering and LF indenting to tccg display options.
* Added XML configuration of LF relation sorting.
* Added :2tb (to testbed) command for adding the current parse to the
testbed.
* Fixed grammar loading so it no longer has to be from the current directory.
* Made it possible to list a stem as a member of an open class family with a
separate pred, without getting an entry with the default pred too.
* Enabled indexRel to be declared at the level of entries or families.
* Added prefs import/export to tccg.
* Added ccg-cvr tool for cross-validating realizer.
* Reconfigured ccg-test with various new switches.
* Put feature licensing on a switch.
* Made pruning strategy configurable.
* Changed representation of coord to work better with
chunking (though less concise).
* Added option to stop realizer after new best time limit (past first
complete realization) is exceeded, via :nbtl N command
0.8.2 - Edge pruning during realization, XML/APML I/O, and more
---------------------------------------------------------------
* Changed build to ccg-build, in bin directory;
also added separate build.xml files to each sample
grammar directory. This way, a call to ccg-build
either builds the system or the current grammar,
depending on what directory you're in.
* Changed realizer to no longer allow unmatched
attr preds (ie sem features). This way, the presence
of certain sem features can be used to control realization
choices, instead of requiring these features to always
be present. To underspecify these choices, the idea
is to eventually allow for their optional inclusion.
* Added more options to turn settings off individually in tccg.
* Enabled realizer to handle type changing rules with
their own semantics in the result category.
* Added configurable edge pruning per category during
realization, which controls the number of edges with
equivalent categories to keep in the chart.
* Fixed unification bug by adding occurs checks to Dollar's fill
method, needed at least in part b/c ArgStack doesn't quite
implement Unifiable.
* Replaced hashString with hashCode and equals up to var names,
yielding a 4-5% improvement in efficiency.
* Switched to grammar.xml file. If none exists, an attempt is
made to load from the default files lexicon.xml, morph.xml and
rules.xml. See grammar.xsd for format.
* Added LF load/save from/to XML via a sequence of transformations
specified in the grammar.xml file.
* Added save-to-xml (:2xml) option for saving LFs to XML
files from tccg.
* Added save-to-apml (:2apml) option for saving last input string to
APML files from tccg.
* Updated parser to apply unary rules repeatedly.
* Various updates to flights grammar, including use of FrameNet roles.
0.8.1 - OpenCCG Release with XML Schemas (!)
----------------------------------------------------
This release adds XML Schema validation to the grammar build
process, where the comments in the XML schemas also
serve as reference documentation for the grammar formats (wahoo!).
The release also contains several bug fixes to the unification
routines, and a more substantial "flights" grammar with
semantic control over pitch accents and boundary tones.
0.8.0 - First OpenNLP CCG Library Release
----------------------------------------------------
Reorganized directories and renamed packages and tools.
Added build target for worldcup sample grammar.
Rewrote scripts for simplicity and parallelism.
Cut out pre-processing components and any classes and
libraries that looked like dead wood. Started removing
unnecessary interfaces.
Grok 0.7.0 - Towards a CCG Realizer
----------------------------------------------------
Mike is taking over Grok development and repurposing it for primary
use as a CCG Realizer in limited domain dialogue systems.
See http://www.iccs.informatics.ed.ac.uk/~mwhite/White-Baldridge-ENLG-2003-to-appear.pdf
for a description of the effort so far.
Version 0.7.0 will be the last Grok release.
After this version, Grok will be split into separately usable
and separately developed OpenNLP components.
Tom Morton will be responsible for further development of the
pre-processing components.
Mike will be responsible for further development of the CCG
parser and realizer.
Grok 0.6.0 - Multi-Modal CCG
----------------------------------------------------
For more information, see Jason's dissertation available at:
http://www.iccs.inf.ed.ac.uk/~jmb/dissertation
See Grok site for further history ...
================================================
FILE: LICENSE
================================================
GNU LESSER GENERAL PUBLIC LICENSE
Version 2.1, February 1999
(The master copy of this license lives on the GNU website.)
Copyright (C) 1991, 1999 Free Software Foundation, Inc. 59 Temple Place, Suite
330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute
verbatim copies of this license document, but changing it is not allowed.
[This is the first released version of the Lesser GPL. It also counts as the
successor of the GNU Library Public License, version 2, hence the version
number 2.1.]
Preamble
The licenses for most software are designed to take away your freedom to share
and change it. By contrast, the GNU General Public Licenses are intended to
guarantee your freedom to share and change free software--to make sure the
software is free for all its users.
This license, the Lesser General Public License, applies to some specially
designated software packages--typically libraries--of the Free Software
Foundation and other authors who decide to use it. You can use it too, but we
suggest you first think carefully about whether this license or the ordinary
General Public License is the better strategy to use in any particular case,
based on the explanations below.
When we speak of free software, we are referring to freedom of use, not
price. Our General Public Licenses are designed to make sure that you have the
freedom to distribute copies of free software (and charge for this service if
you wish); that you receive source code or can get it if you want it; that you
can change the software and use pieces of it in new free programs; and that you
are informed that you can do these things.
To protect your rights, we need to make restrictions that forbid distributors to
deny you these rights or to ask you to surrender these rights. These
restrictions translate to certain responsibilities for you if you distribute
copies of the library or if you modify it.
For example, if you distribute copies of the library, whether gratis or for a
fee, you must give the recipients all the rights that we gave ou. You must make
sure that they, too, receive or can get the source code. If you link other code
with the library, you must provide complete object files to the recipients, so
that they can relink them with the library after making changes to the library
and recompiling it. And you must show them these terms so they know their
rights.
We protect your rights with a two-step method: (1) we copyright the library, and
(2) we offer you this license, which gives you legal permission to copy,
distribute and/or modify the library.
To protect each distributor, we want to make it very clear that there is no
warranty for the free library. Also, if the library is modified by someone else
and passed on, the recipients should know that what they have is not the
original version, so that the original author's reputation will not be affected
by problems that might be introduced by others.
Finally, software patents pose a constant threat to the existence of any free
program. We wish to make sure that a company cannot effectively restrict the
users of a free program by obtaining a restrictive license from a patent
holder. Therefore, we insist that any patent license obtained for a version of
the library must be consistent with the full freedom of use specified in this
license.
Most GNU software, including some libraries, is covered by the ordinary GNU
General Public License. This license, the GNU Lesser General Public License,
applies to certain designated libraries, and is quite different from the
ordinary General Public License. We use this license for certain libraries in
order to permit linking those libraries into non-free programs.
When a program is linked with a library, whether statically or using a shared
library, the combination of the two is legally speaking a combined work, a
derivative of the original library. The ordinary General Public License
therefore permits such linking only if the entire combination fits its criteria
of freedom. The Lesser General Public License permits more lax criteria for
linking other code with the library.
We call this license the "Lesser" General Public License because it does Less to
protect the user's freedom than the ordinary General Public License. It also
provides other free software developers Less of an advantage over competing
non-free programs. These disadvantages are the reason we use the ordinary
General Public License for many libraries. However, the Lesser license provides
advantages in certain special circumstances.
For example, on rare occasions, there may be a special need to encourage the
widest possible use of a certain library, so that it becomes a de-facto
standard. To achieve this, non-free programs must be allowed to use the
library. A more frequent case is that a free library does the same job as widely
used non-free libraries. In this case, there is little to gain by limiting the
free library to free software only, so we use the Lesser General Public License.
In other cases, permission to use a particular library in non-free programs
enables a greater number of people to use a large body of free software. For
example, permission to use the GNU C Library in non-free programs enables many
more people to use the whole GNU operating system, as well as its variant, the
GNU/Linux operating system.
Although the Lesser General Public License is Less protective of the users'
freedom, it does ensure that the user of a program that is linked with the
Library has the freedom and the wherewithal to run that program using a modified
version of the Library.
The precise terms and conditions for copying, distribution and modification
follow. Pay close attention to the difference between a "work based on the
library" and a "work that uses the library". The former contains code derived
from the library, whereas the latter must be combined with the library in order
to run.
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any software library or other program which
contains a notice placed by the copyright holder or other authorized party
saying it may be distributed under the terms of this Lesser General Public
License (also called "this License"). Each licensee is addressed as "you".
A "library" means a collection of software functions and/or data prepared so as
to be conveniently linked with application programs (which use some of those
functions and data) to form executables.
The "Library", below, refers to any such software library or work which has been
distributed under these terms. A "work based on the Library" means either the
Library or any derivative work under copyright law: that is to say, a work
containing the Library or a portion of it, either verbatim or with modifications
and/or translated straightforwardly into another language. (Hereinafter,
translation is included without limitation in the term "modification".)
"Source code" for a work means the preferred form of the work for making
modifications to it. For a library, complete source code means all the source
code for all modules it contains, plus any associated interface definition
files, plus the scripts used to control compilation and installation of the
library.
Activities other than copying, distribution and modification are not covered by
this License; they are outside its scope. The act of running a program using the
Library is not restricted, and output from such a program is covered only if its
contents constitute a work based on the Library (independent of the use of the
Library in a tool for writing it). Whether that is true depends on what the
Library does and what the program that uses the Library does.
1. You may copy and distribute verbatim copies of the Library's complete source
code as you receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice and
disclaimer of warranty; keep intact all the notices that refer to this License
and to the absence of any warranty; and distribute a copy of this License along
with the Library.
You may charge a fee for the physical act of transferring a copy, and you may at
your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Library or any portion of it, thus
forming a work based on the Library, and copy and distribute such modifications
or work under the terms of Section 1 above, provided that you also meet all of
these conditions:
a) The modified work must itself be a software library.
b) You must cause the files modified to carry prominent notices stating
that you changed the files and the date of any change.
c) You must cause the whole of the work to be licensed at no charge to all
third parties under the terms of this License.
d) If a facility in the modified Library refers to a function or a table of
data to be supplied by an application program that uses the facility, other
than as an argument passed when the facility is invoked, then you must make
a good faith effort to ensure that, in the event an application does not
supply such function or table, the facility still operates, and performs
whatever part of its purpose remains meaningful.
(For example, a function in a library to compute square roots has a purpose
that is entirely well-defined independent of the application. Therefore,
Subsection 2d requires that any application-supplied function or table used
by this function must be optional: if the application does not supply it,
the square root function must still compute square roots.)
These requirements apply to the modified work as a whole. If identifiable
sections of that work are not derived from the Library, and can be
reasonably considered independent and separate works in themselves, then
this License, and its terms, do not apply to those sections when you
distribute them as separate works. But when you distribute the same
sections as part of a whole which is a work based on the Library, the
distribution of the whole must be on the terms of this License, whose
permissions for other licensees extend to the entire whole, and thus to
each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest your
rights to work written entirely by you; rather, the intent is to exercise
the right to control the distribution of derivative or collective works
based on the Library.
In addition, mere aggregation of another work not based on the Library with
the Library (or with a work based on the Library) on a volume of a storage
or distribution medium does not bring the other work under the scope of
this License.
3. You may opt to apply the terms of the ordinary GNU General Public License
instead of this License to a given copy of the Library. To do this, you must
alter all the notices that refer to this License, so that they refer to the
ordinary GNU General Public License, version 2, instead of to this License. (If
a newer version than version 2 of the ordinary GNU General Public License has
appeared, then you can specify that version instead if you wish.) Do not make
any other change in these notices.
Once this change is made in a given copy, it is irreversible for that copy, so
the ordinary GNU General Public License applies to all subsequent copies and
derivative works made from that copy.
This option is useful when you wish to copy part of the code of the Library into
a program that is not a library.
4. You may copy and distribute the Library (or a portion or derivative of it,
under Section 2) in object code or executable form under the terms of Sections 1
and 2 above provided that you accompany it with the complete corresponding
machine-readable source code, which must be distributed under the terms of
Sections 1 and 2 above on a medium customarily used for software interchange.
If distribution of object code is made by offering access to copy from a
designated place, then offering equivalent access to copy the source code from
the same place satisfies the requirement to distribute the source code, even
though third parties are not compelled to copy the source along with the object
code.
5. A program that contains no derivative of any portion of the Library, but is
designed to work with the Library by being compiled or linked with it, is called
a "work that uses the Library". Such a work, in isolation, is not a derivative
work of the Library, and therefore falls outside the scope of this License.
However, linking a "work that uses the Library" with the Library creates an
executable that is a derivative of the Library (because it contains portions of
the Library), rather than a "work that uses the library". The executable is
therefore covered by this License. Section 6 states terms for distribution of
such executables.
When a "work that uses the Library" uses material from a header file that is
part of the Library, the object code for the work may be a derivative work of
the Library even though the source code is not. Whether this is true is
especially significant if the work can be linked without the Library, or if the
work is itself a library. The threshold for this to be true is not precisely
defined by law.
If such an object file uses only numerical parameters, data structure layouts
and accessors, and small macros and small inline functions (ten lines or less in
length), then the use of the object file is unrestricted, regardless of whether
it is legally a derivative work. (Executables containing this object code plus
portions of the Library will still fall under Section 6.)
Otherwise, if the work is a derivative of the Library, you may distribute the
object code for the work under the terms of Section 6. Any executables
containing that work also fall under Section 6, whether or not they are linked
directly with the Library itself.
6. As an exception to the Sections above, you may also combine or link a "work
that uses the Library" with the Library to produce a work containing portions of
the Library, and distribute that work under terms of your choice, provided that
the terms permit modification of the work for the customer's own use and reverse
engineering for debugging such modifications.
You must give prominent notice with each copy of the work that the Library is
used in it and that the Library and its use are covered by this License. You
must supply a copy of this License. If the work during execution displays
copyright notices, you must include the copyright notice for the Library among
them, as well as a reference directing the user to the copy of this
License. Also, you must do one of these things:
a) Accompany the work with the complete corresponding machine-readable
source code for the Library including whatever changes were used in the
work (which must be distributed under Sections 1 and 2 above); and, if the
work is an executable linked with the Library, with the complete
machine-readable "work that uses the Library", as object code and/or source
code, so that the user can modify the Library and then relink to produce a
modified executable containing the modified Library. (It is understood that
the user who changes the contents of definitions files in the Library will
not necessarily be able to recompile the application to use the modified
definitions.)
b) Use a suitable shared library mechanism for linking with the Library. A
suitable mechanism is one that (1) uses at run time a copy of the library
already present on the user's computer system, rather than copying library
functions into the executable, and (2) will operate properly with a
modified version of the library, if the user installs one, as long as the
modified version is interface-compatible with the version that the work was
made with.
c) Accompany the work with a written offer, valid for at least three years,
to give the same user the materials specified in Subsection 6a, above, for
a charge no more than the cost of performing this distribution.
d) If distribution of the work is made by offering access to copy from a
designated place, offer equivalent access to copy the above specified
materials from the same place.
e) Verify that the user has already received a copy of these materials or
that you have already sent this user a copy.
For an executable, the required form of the "work that uses the Library" must
include any data and utility programs needed for reproducing the executable from
it. However, as a special exception, the materials to be distributed need not
include anything that is normally distributed (in either source or binary form)
with the major components (compiler, kernel, and so on) of the operating system
on which the executable runs, unless that component itself accompanies the
executable.
It may happen that this requirement contradicts the license restrictions of
other proprietary libraries that do not normally accompany the operating
system. Such a contradiction means you cannot use both them and the Library
together in an executable that you distribute.
7. You may place library facilities that are a work based on the Library
side-by-side in a single library together with other library facilities not
covered by this License, and distribute such a combined library, provided that
the separate distribution of the work based on the Library and of the other
library facilities is otherwise permitted, and provided that you do these two
things:
a) Accompany the combined library with a copy of the same work based on the
Library, uncombined with any other library facilities. This must be
distributed under the terms of the Sections above.
b) Give prominent notice with the combined library of the fact that part of
it is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
8. You may not copy, modify, sublicense, link with, or distribute the Library
except as expressly provided under this License. Any attempt otherwise to copy,
modify, sublicense, link with, or distribute the Library is void, and will
automatically terminate your rights under this License. However, parties who
have received copies, or rights, from you under this License will not have their
licenses terminated so long as such parties remain in full compliance.
9. You are not required to accept this License, since you have not signed
it. However, nothing else grants you permission to modify or distribute the
Library or its derivative works. These actions are prohibited by law if you do
not accept this License. Therefore, by modifying or distributing the Library (or
any work based on the Library), you indicate your acceptance of this License to
do so, and all its terms and conditions for copying, distributing or modifying
the Library or works based on it.
10. Each time you redistribute the Library (or any work based on the Library),
the recipient automatically receives a license from the original licensor to
copy, distribute, link with or modify the Library subject to these terms and
conditions. You may not impose any further restrictions on the recipients'
exercise of the rights granted herein. You are not responsible for enforcing
compliance by third parties with this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues), conditions
are imposed on you (whether by court order, agreement or otherwise) that
contradict the conditions of this License, they do not excuse you from the
conditions of this License. If you cannot distribute so as to satisfy
simultaneously your obligations under this License and any other pertinent
obligations, then as a consequence you may not distribute the Library at
all. For example, if a patent license would not permit royalty-free
redistribution of the Library by all those who receive copies directly or
indirectly through you, then the only way you could satisfy both it and this
License would be to refrain entirely from distribution of the Library.
If any portion of this section is held invalid or unenforceable under any
particular circumstance, the balance of the section is intended to apply, and
the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any patents or
other property right claims or to contest validity of any such claims; this
section has the sole purpose of protecting the integrity of the free software
distribution system which is implemented by public license practices. Many
people have made generous contributions to the wide range of software
distributed through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing to
distribute software through any other system and a licensee cannot impose that
choice.
This section is intended to make thoroughly clear what is believed to be a
consequence of the rest of this License.
12. If the distribution and/or use of the Library is restricted in certain
countries either by patents or by copyrighted interfaces, the original copyright
holder who places the Library under this License may add an explicit
geographical distribution limitation excluding those countries, so that
distribution is permitted only in or among countries not thus excluded. In such
case, this License incorporates the limitation as if written in the body of this
License.
13. The Free Software Foundation may publish revised and/or new versions of the
Lesser General Public License from time to time. Such new versions will be
similar in spirit to the present version, but may differ in detail to address
new problems or concerns.
Each version is given a distinguishing version number. If the Library specifies
a version number of this License which applies to it and "any later version",
you have the option of following the terms and conditions either of that version
or of any later version published by the Free Software Foundation. If the
Library does not specify a license version number, you may choose any version
ever published by the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free programs
whose distribution conditions are incompatible with these, write to the author
to ask for permission. For software which is copyrighted by the Free Software
Foundation, write to the Free Software Foundation; we sometimes make exceptions
for this. Our decision will be guided by the two goals of preserving the free
status of all derivatives of our free software and of promoting the sharing and
reuse of software generally.
NO WARRANTY
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE
LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED
IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS
IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT
NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL
ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE
LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL,
SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY
TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER
PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
================================================
FILE: README.md
================================================
# OpenCCG
OpenCCG is a system for parsing and generating text using [combinatory categorial grammar](https://en.wikipedia.org/wiki/Combinatory_categorial_grammar) for syntax and [hybrid logic dependency semantics](https://www.aclweb.org/anthology/P02-1041) for, well, the semantic representation.
If that seems like a mouthful, don't worry too much about the details right now.
You can get started [installing OpenCCG](https://davehowcroft.com/post/installing-openccg/) and [working with OpenCCG using the `tccg` utility](https://davehowcroft.com/post/getting-started-with-openccg/) right now.
If, on the other hand, you want to start understanding what that mouthful means, Johanna Moore at the University of Edinburgh has some [helpful course notes on NLG in general and OpenCCG in particular](https://www.inf.ed.ac.uk/teaching/courses/nlg/).
# Project information
See CHANGES for a description of the project status. Also see the OpenCCG web site and wiki at UT Austin:
* http://openccg.sf.net
* http://www.utcompling.com/wiki/openccg
This `README.md` file contains the configuration and build instructions. Next you'll probably want to look at the tutorial on writing grammars in the human-friendly 'dot ccg' syntax on [the UT Austin OpenCCG wiki](http://www.utcompling.com/wiki/openccg/visccg-tutorial).
After that it may be helpful to look at the "native" grammar specification in "Specifying Grammars for OpenCCG: A Rough Guide" in `docs/grammars-rough-guide.pdf`, as well as the `SAMPLE_GRAMMARS` file for descriptions of the sample grammars that come with the distribution, including ones using the DotCCG syntax. A (somewhat dated) programmer's guide to using the OpenCCG realizer appears in `docs/realizer-manual.pdf`.
This release also includes a broad English coverage grammar from the CCGBank and associated statistical models; see `docs/ccgbank-README` for details.
# Requirements
* Version 1.6 or later of the Java 2 SDK (http://java.sun.com)
* For ccg2xml and other tools, Python version 2.4 to 2.7 (http://www.python.org)
# Libraries
If you're working with the latest source version from GitHub, you'll need to download the external libraries from the latest release, as GitHub discourages including binaries in their repos:
* Download the [latest release of OpenCCG from sourceforge](https://sourceforge.net/projects/openccg/)
* Unpack the archive and copy over the files from `openccg/lib/`, as well as `openccg/ccgbank/bin/ner/NERApp.jar`
* Build the latest source as described further below
# Configuring your environment variables
The easiest thing to do is to set the environment variables `JAVA_HOME` and `OPENCCG_HOME` to the relevant locations on your system. Set `JAVA_HOME` to match the top level directory containing the Java installation you want to use.
For example, on Windows:
```
C:\> set JAVA_HOME=C:\Program Files\jdk1.6.0_04
```
or on Unix:
```
% setenv JAVA_HOME /usr/local/java
(csh)
> export JAVA_HOME=/usr/java
(ksh, bash)
```
On Windows, to get these settings to persist, it's actually easiest to set your environment variables through the System Properties from the Control Panel. For example, under WinXP, go to Control Panel, click on System Properties, choose the Advanced tab, click on Environment Variables, and add your settings in the User variables area.
Next, likewise set `OPENCCG_HOME` to be the top level directory where you unzipped the download. In Unix, type `pwd` in the directory where this file is and use the path given to you by the shell as `OPENCCG_HOME`. You can set this in the same manner as for `JAVA_HOME` above.
Next, add the directory `OPENCCG_HOME/bin` to your path. For example, you can set the path in your `.bashrc` file as follows:
```
> export PATH="$PATH:$OPENCCG_HOME/bin"
```
On Windows, you should also add the python main directory to your path.
Finally, if you are going to use [KenLM](https://kheafield.com/code/kenlm/) with very large language models for realization with CCGbank-extracted grammars on linux, you'll also need to set the library load path:
```
> export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$OPENCCG_HOME/lib
```
Once you have taken care of these things, you should be able to build and use the OpenCCG Library.
**Note**: Spaces are allowed in `JAVA_HOME` but not in `OPENCCG_HOME`. To set an environment variable with spaces in it, you need to put quotes around the value when on Unix, but you must *NOT* do this when under Windows.
# Increasing Java memory limit
If you're working with a broad coverage grammar and statistical parsing or realization models, you'll probably need to increase the default memory limit for running OpenCCG's tools. You can do so by editing `bin/ccg-env[.bat]`, increasing the JAVA_MEM environment variable at the end of this script. For training perceptron models in memory, you may need 16g; for realization with the very large gigaword 5-gram model, you may need 8g; otherwise, for parsing and realization with CCGbank-derived models, 4g or possibly even 2g should suffice; finally, for small grammars 512m or 256m should be ok.
# Trying it out
If you've managed to configure the system, you should be able to change to the directory for the "tiny" sample grammar and run `tccg` (for text ccg), the command-line tool for interactively testing grammars:
```
> cd grammars
> cd tiny
> tccg (Windows/Unix)
```
Provided tccg starts properly, it loads the grammar files, parses them, and shows the command-line interface (at which point you can type `:h` for help or `:q` to quit).
If you trouble starting up tccg, make sure you have set the environment variables properly, and that the tccg script (located in `openccg/bin`) calls the right shell environment (top-line of the script; to solve the problem, either comment out this line or correct the path).
# Visualizing semantic graphs
Semantic dependency graphs in testbed files can be visualized with the help of Graphviz's dot tool. First, download and install [Graphviz](http://www.graphviz.org/). Then, use tccg to create a testbed files with logical forms in it. For example, you can try some examples in the worldcup sample grammar and save them to a file using the command ':2tb tb.xml'. Then make a directory to store the visualized graphs. Finally, run the ccg-draw-graph tool as shown below:
```
> cd grammars/worldcup
> tccg (parse examples, save using ':2tb tb.xml')
> mkdir graphs
> ccg-draw-graph -i tb.xml -v graphs/g
```
You can also show the semantic classes or word indices using the `-c` or `-w` options, respectively. The graphs can be displayed with any PDF display tool.
Note that the graph visualization requires the logical forms to be stored in an xml node-rel format for graphs, as in the worldcup or routes sample grammars. See `SAMPLE_GRAMMARS` for more information.
# Creating disjunctive logical forms
This release includes a new disjunctivizer package, for creating a disjunctive LF XML structure based on an LF graph difference. An LF graph difference is a characterization of the difference between two Hybrid Logic Dependency Semantics graphs and an alignment between them in terms of the edits needed to make one into the other: inserts, deletes, and substitutions. See the build file for junit tests that illustrate how to use the package.
# Generating grammar documentation
OpenCCG includes a tool for generating HTML documentation of the XML files that specify a grammar. It can be run either from the `ccg-grammardoc` script in the `bin/` directory, or as an Ant task. An example of how to incorporate GrammarDoc into an Ant build file is given in the "tiny" grammar (`grammars/tiny/build.xml`), in a build target called `document`.
# Building the system from source
The OpenCCG build system is based on Apache Ant. Ant is a little but very handy tool that uses a build file written in XML (`build.xml`) as building instructions. Building the Java portion of OpenCCG is accomplished using the script `ccg-build`; this works under Windows and Unix, but requires that you run it from the top-level directory (where the `build.xml` file is located). If everything is right and all the required packages are visible, this action will generate a file called openccg.jar in the `./lib` directory.
Note that you should *not* build from source by invoking 'ant' directly. Instead, you should use `ccg-build` as shown below (Unix), after ensuring that you've set `OPENCCG_HOME`, `JAVA_HOME` and updated your `PATH` (the `ccg-build` script invokes ant with various parameters that aren't set properly if ant is invoked from the command line):
```
> cd $OPENCCG_HOME
> ccg-build
```
# Working with the Eclipse IDE
The Eclipse IDE can be used for editing the Java source code, though setup can be a bit tricky. The most reliable method seems to be as follows. First, follow the instructions above for building the source from the command line. Then, in Eclipse, choose File|New|Java Project to create a new Java Project, and give it a name, such as 'openccg'. Leave the default settings as they are, and click Next. Then choose Link Additional Source and browse to the folder `src/` in the directory where you installed OpenCCG (i.e. `$OPENCCG_HOME/src`). You'll need to give this location a new name, such as 'src2' ('src' is already taken by default). The final step is to Add External JARs under the Libraries tab. From OpenCCG's lib directory (i.e. `$OPENCCG_HOME/lib`), choose all of the `.jar` files. At this point, you should be able to hit Finish and the code should compile in Eclipse.
Note that with Eclipse's default settings, the code will compile in your Eclipse workspace, which is separate from your OpenCCG installation (this is a good thing, as Eclipse uses a `bin/` directory for compiled Java classes, whereas OpenCCG uses `bin/` for command-line scripts). Thus, once you have made a round of changes in Eclipse and are ready to try them out in OpenCCG, go back to the command line in `$OPENCCG_HOME` and invoke `ccg-build` to re-build the `openccg.jar` file. This will make your changes available in OpenCCG's programs, such as `tccg`.
# Bug Reports
Please report bugs at by creating [an issue with a description of the problem](https://github.com/OpenCCG/openccg/issues).
================================================
FILE: SAMPLE_GRAMMARS
================================================
This SAMPLE_GRAMMARS file describes the sample grammars that come
with the distribution, and provides an overview of how the grammars
are organized.
Grammars written directly in the XML format used by OpenCCG appear in
separate directories under grammars/. There are currently four small
English grammars -- tiny, worldcup, flights, and comic -- plus a series of
related grammars, mini-*, for Basque, Dyirbal, English, Inuit, Tagalog and
Turkish, which are from Bozsahin and Steedman's (2003) study of
ergativity. The worldcup grammar includes the English examples from
Baldridge (2002). (The Dutch, Turkish, Tagalog, and Toba Batak grammars
have not been updated from Grok version 0.6.) The flights and comic
grammars (used in the FLIGHTS and COMIC systems) make use of a shared
grammar of core English, in the core-en dir, and contain categories for
pitch accents and boundary tones.
Grammars written in the front-end `dot CCG' format, which attempts to provide
a more powerful and easier-to-use format than the raw XML, are in separate
directories under ccg-format-grammars/. There are currently three grammars
here -- tiny, tinytiny, and arabic. `tiny' is a grammar originally based on
the `tiny' English grammar contained in the grammars/ directory and
documented above. It has been significantly expanded so as to demonstrate
the various features of the CCG format. `tinytiny' is a smaller English
grammar extracted from `tiny', which attempts to demonstrate a minimal-size
useful grammar. `arabic' is a grammar of a large chunk of Classical Arabic,
written by Ben Wing. It was created in particular to demonstrate the power
of CCG-format macros in handling complex morphology, and contains a nearly
full grammar of Arabic verbs. Dot CCG grammars are compiled using ccg2xml;
run ccg2xml -h for usage.
The best place to look for more info on the dot CCG format is in
ccg-format-grammars/tiny/tiny.ccg and in src/ccg2xml/README.
This release also includes a broad English coverage grammar from the
CCGBank and associated statistical models; see docs/ccgbank-README for
details.
Note that with all the grammars, there is the option to store logical
forms in an xml node-rel format for graphs. Conversion to this graph
format is done using a couple of XSLT transforms specified in the
grammar.xml file; see grammars/worldcup/grammar.xml for an example.
When using this graph format, it is also possible to visualize the
semantic graphs, as described in the main README file.
At present, ccg2xml does not support writing grammar.xml files with
the XSLT transforms for the node-rel graph format. As a workaround,
you can add these transforms to your own version of the file which you
then copy over the generated grammar.xml file, as shown below.
> ccg2xml --prefix= mygram.ccg
> cp mygram-grammar.xml grammar.xml
================================================
FILE: TODO
================================================
General OpenCCG development:
----------------------------
- Add check for target LF when adding/writing full words (incl. supertags).
- Look into better handling of optional args.
- Extend feature hierarchy biz to work with category types;
would make sense to also add category vars.
- Add final bits to grammars rough guide (esp. feature licensing).
- Add option to update testbed.
- Binding theory?
- Get agreement to work with anaphors in appositives.
- Add more dynamic checks, eg for non-existent indexRel values or licensing attrs.
- Add well-formedness check for unique roles -- ie, that role must occur
only once per semantic head -- and associated method for declaring that
roles must be unique.
- Interface to morph transducers.
- Make UnifyControl etc thread friendly. Could try tying global vars to
current thread.
- Improve unification efficiency. Could try indexing, caching across calls
to parser or realizer, and structure sharing with delayed copying.
Realizer-oriented development:
------------------------------
- Try using coarticulations with pitch accents.
- Add orthographic post-processing (capitalization, spacing of punctuation).
- Look into instantiating outermost args.
TODO prior to Feb 16:
---------------------
Tabs for testbed, lexicon, features
Nice tree graph for features
Get Arabic to compile
Issues of colors, fonts, etc. (Alexis help)
PNG's of the slash modalities
Option menu for different magnification (50%, 100%, 200% ...)
Get pretty buttons from Justin
Make sure all path, etc. issues are working
Figure out what's the deal with __init__.py -- importing from another dir
Possibly:
help-over descriptions of families, provided by Javadoc-style comments in
the source
Alexis -- help with more specific user-interface issues, overall management
involving various people
Sudipta --
-- PNG's of slash modalities; add to editor
-- TeX/Tk font conversion
-- find a Tkinter package for displaying tree graphs; use it to add a graph for features to the editor
-- figure out what's wrong with arabic?
semantics: RED
features: BLUE
categories: BLACK, sans serif font
background: WHITE
Ben TODO!!!!!
-------------
Create professional-looking web page off of comp.ling.utexas.edu
================================================
FILE: bin/ccg-build
================================================
#!/bin/sh
. ccg-env
ANT_HOME="$OPENCCG_LIB"
PROPS="-Dant.home=$ANT_HOME -Dopenccg.home=$OPENCCG_HOME"
case `uname` in
CYGWIN* ) XALAN_JARS="$OPENCCG_LIB/xalan.jar;$OPENCCG_LIB/xercesImpl.jar;$OPENCCG_LIB/xml-apis.jar;$OPENCCG_LIB/xsltc.jar;$OPENCCG_LIB/serializer.jar"
ANT_JARS="$OPENCCG_LIB/ant.jar;$OPENCCG_LIB/ant-launcher.jar;$OPENCCG_LIB/ant-contrib.jar"
ANT_JARS="$ANT_JARS;$OPENCCG_LIB/ant-junit.jar;$OPENCCG_LIB/ant-junit4.jar;$OPENCCG_LIB/junit-4.10.jar"
CP="$JAVA_HOME/lib/tools.jar;$OPENCCG_JAR;$ANT_JARS;$XALAN_JARS;$DIRLIBS;."
PROPS="$PROPS -Dcygwin=true"
;;
* ) XALAN_JARS="$OPENCCG_LIB/xalan.jar:$OPENCCG_LIB/xercesImpl.jar:$OPENCCG_LIB/xml-apis.jar:$OPENCCG_LIB/xsltc.jar:$OPENCCG_LIB/serializer.jar"
ANT_JARS="$OPENCCG_LIB/ant.jar:$OPENCCG_LIB/ant-launcher.jar:$OPENCCG_LIB/ant-contrib.jar"
ANT_JARS="$ANT_JARS:$OPENCCG_LIB/ant-junit.jar:$OPENCCG_LIB/ant-junit4.jar:$OPENCCG_LIB/junit-4.10.jar"
CP="$JAVA_HOME/lib/tools.jar:$OPENCCG_JAR:$ANT_JARS:$XALAN_JARS:$DIRLIBS:."
;;
esac
"$JAVA" $JAVA_MEM -classpath "$CP" $PROPS org.apache.tools.ant.launch.Launcher $@
================================================
FILE: bin/ccg-build.bat
================================================
@echo off
call ccg-env
set ANT_HOME=%OPENCCG_LIB%
set PROPS=-Dant.home=%ANT_HOME% -Dopenccg.home=%OPENCCG_HOME%
set XALAN_JARS=%OPENCCG_LIB%\xalan.jar;%OPENCCG_LIB%\xercesImpl.jar;%OPENCCG_LIB%\xml-apis.jar;%OPENCCG_LIB%\xsltc.jar;%OPENCCG_LIB%\serializer.jar
set ANT_JARS=%OPENCCG_LIB%\ant.jar;%OPENCCG_LIB%\ant-launcher.jar;%OPENCCG_LIB%\ant-contrib.jar
set ANT_JARS=%ANT_JARS%;%OPENCCG_LIB%\ant-junit.jar;%OPENCCG_LIB%\ant-junit4.jar;%OPENCCG_LIB%\junit-4.10.jar
set CP="%JAVA_HOME%\lib\tools.jar";%OPENCCG_JAR%;%ANT_JARS%;%XALAN_JARS%;%DIRLIBS%;.
%JAVA% %JAVA_MEM% -classpath %CP% %PROPS% org.apache.tools.ant.launch.Launcher %*
================================================
FILE: bin/ccg-cvr
================================================
#!/bin/sh
# For usage, do: ccg-cvr -h
. ccg-env
"$JAVA" $JAVA_ARGS opennlp.ccg.test.CrossValidateRealizer $@
================================================
FILE: bin/ccg-cvr.bat
================================================
@echo off
rem For usage, do: ccg-cvr -h
call ccg-env
%JAVA_CMD% opennlp.ccg.test.CrossValidateRealizer %*
================================================
FILE: bin/ccg-draw-graph
================================================
#!/bin/sh
python "$OPENCCG_HOME/bin/dlf_parser.py" "$@"
================================================
FILE: bin/ccg-draw-graph.bat
================================================
@echo off
python %OPENCCG_HOME%/bin/dlf_parser.py %*
================================================
FILE: bin/ccg-draw-tree
================================================
#!/bin/sh
python "$OPENCCG_HOME/bin/ccg_draw_tree.py" "$@"
================================================
FILE: bin/ccg-draw-tree.bat
================================================
@echo off
python %OPENCCG_HOME%/bin/ccg_draw_tree.py %*
================================================
FILE: bin/ccg-env
================================================
#!/bin/sh
# sets OpenCCG environment variables
if [ "$JAVA_HOME" = "" ] ; then
echo
echo "Error: JAVA_HOME not found in your environment."
echo
echo "Please set the JAVA_HOME variable in your environment to match the"
echo "location of the Java Virtual Machine you want to use."
exit 1
fi
if [ "$OPENCCG_HOME" = "" ] ; then
echo
echo "Error: OPENCCG_HOME not found in your environment."
echo
echo "Please set the OPENCCG_HOME variable in your environment to match the"
echo "location of your OpenNLP CCG Library distribution."
exit 1
fi
case `uname` in
CYGWIN* ) OPENCCG_HOME="`cygpath -w $OPENCCG_HOME`"
;;
esac
OPENCCG_LIB="$OPENCCG_HOME/lib"
OPENCCG_SRC="$OPENCCG_HOME/src"
OPENCCG_CLASSES="$OPENCCG_HOME/output/classes"
OPENCCG_JAR="$OPENCCG_HOME/lib/openccg.jar"
case `uname` in
CYGWIN* ) DIRLIBS="$OPENCCG_LIB/trove.jar;$OPENCCG_LIB/jdom.jar;$OPENCCG_LIB/jline.jar;$OPENCCG_LIB/jopt-simple.jar"
CP="${OPENCCG_JAR};${DIRLIBS};."
;;
* ) DIRLIBS="$OPENCCG_LIB/trove.jar:$OPENCCG_LIB/jdom.jar:$OPENCCG_LIB/jline.jar:$OPENCCG_LIB/jopt-simple.jar"
CP="${OPENCCG_JAR}:${DIRLIBS}:."
;;
esac
# variant for use with 'build compile' option, if desired:
#CP="${OPENCCG_CLASSES}:${OPENCCG_SRC}:${DIRLIBS}"
JAVA="$JAVA_HOME/bin/java"
JAVA_MEM="-Xmx256m"
#JAVA_MEM="-Xmx2048m"
#JAVA_MEM="-Xmx8g"
#JAVA_MEM="-Xmx16g"
JAVA_ARGS="$JAVA_MEM -classpath $CP -Dfile.encoding=UTF8"
================================================
FILE: bin/ccg-env.bat
================================================
@echo off
rem sets OpenCCG environment variables
if not exist "%JAVA_HOME%" goto no_JAVA_HOME
if not exist "%OPENCCG_HOME%" goto no_OPENCCG_HOME
set OPENCCG_LIB=%OPENCCG_HOME%\lib
set DIRLIBS=%OPENCCG_LIB%\trove.jar;%OPENCCG_LIB%\jdom.jar;%OPENCCG_LIB%\jline.jar;%OPENCCG_LIB%\jopt-simple.jar
set XMLLIBS=%OPENCCG_LIB%\xml-apis.jar;%OPENCCG_LIB%\xercesImpl.jar;%OPENCCG_LIB%\xalan.jar
set OPENCCG_SRC=%OPENCCG_HOME%\src
set OPENCCG_CLASSES=%OPENCCG_HOME%\output\classes
set OPENCCG_JAR=%OPENCCG_HOME%\lib\openccg.jar
rem variant without XMLLIBS
rem set CP=%OPENCCG_JAR%;%DIRLIBS%;.
rem variant with XMLLIBS
set CP=%OPENCCG_JAR%;%DIRLIBS%;%XMLLIBS%;.
rem variant for use with 'build compile' option, if desired:
rem set CP=%OPENCCG_CLASSES%;%OPENCCG_SRC%;%DIRLIBS%
set JAVA="%JAVA_HOME%\bin\java"
set JAVA_MEM=-Xmx256m
rem set JAVA_MEM=-Xmx2048m
set JAVA_CMD=%JAVA% %JAVA_MEM% -classpath %CP% -Dfile.encoding=UTF8
goto end
:no_JAVA_HOME
echo.
echo Error: JAVA_HOME not found in your environment.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of the Java Virtual Machine you want to use.
echo.
exit /b 1
:no_OPENCCG_HOME
echo.
echo Error: OPENCCG_HOME not found in your environment.
echo.
echo Please set the OPENCCG_HOME variable in your environment to match the
echo location of your OpenNLP CCG Library distribution.
echo.
exit /b 1
:end
================================================
FILE: bin/ccg-grammardoc
================================================
#!/bin/sh
#
# $Id: ccg-grammardoc,v 1.2 2006/12/03 17:14:23 mwhite14850 Exp $
# Script to run grammardoc from the command line.
# Author: Scott Martin (http://www.ling.osu.edu/~scott/)
#
# Usage: ccg-grammardoc [-s|--source sourceDir] [-d|--dest destDir]
#
. ccg-env
ANT_HOME="$OPENCCG_HOME/lib"
case `uname` in
CYGWIN* ) CP="$CP;$ANT_HOME/ant.jar"
;;
* ) CP="$CP:$ANT_HOME/ant.jar"
;;
esac
JAVA_ARGS="-Xmx128m -classpath $CP"
"$JAVA" $JAVA_ARGS opennlp.ccg.grammardoc.GrammarDoc $@
================================================
FILE: bin/ccg-grammardoc.bat
================================================
@echo off
rem Usage: ccg-grammardoc [-s|--source sourceDir] [-d|--dest destDir]
call ccg-env
set ANT_HOME=%OPENCCG_HOME%\lib
set CP=%CP%;%ANT_HOME%\ant.jar
set JAVA_ARGS=-Xmx128m -classpath %CP%
%JAVA% %JAVA_ARGS% opennlp.ccg.grammardoc.GrammarDoc %*
================================================
FILE: bin/ccg-gt
================================================
#!/bin/sh
# For usage, do: ccg-gt -h
. ccg-env
"$JAVA" $JAVA_ARGS opennlp.ccg.test.GenTargets $@
================================================
FILE: bin/ccg-gt.bat
================================================
@echo off
rem For usage, do: ccg-gt -h
call ccg-env
%JAVA_CMD% opennlp.ccg.test.GenTargets %*
================================================
FILE: bin/ccg-ht-factors
================================================
#!/bin/sh
. ccg-env
#CP=$CP:$OPENCCG_HOME/lib/jopt-simple.jar
#echo $JAVA_ARGS
"$JAVA" $JAVA_ARGS opennlp.ccg.realize.hypertagger.LMFactorExtractor $@
================================================
FILE: bin/ccg-hypertagger
================================================
#!/bin/sh
. ccg-env
#CP=$CP:$OPENCCG_HOME/lib/jopt-simple.jar
#echo $JAVA_ARGS
"$JAVA" $JAVA_ARGS opennlp.ccg.realize.hypertagger.TagExtract $@
================================================
FILE: bin/ccg-hypertagger.bat
================================================
@echo off
call ccg-env
%JAVA_CMD% opennlp.ccg.realize.hypertagger.TagExtract %*
================================================
FILE: bin/ccg-parse
================================================
#!/bin/sh
# Usage: ccg-parse -h
. ccg-env
"$JAVA" $JAVA_ARGS opennlp.ccg.Parse $@
================================================
FILE: bin/ccg-parse.bat
================================================
@echo off
rem Usage: ccg-parse -h
call ccg-env
%JAVA_CMD% opennlp.ccg.Parse %1 %2 %3 %4 %5 %6 %7 %8 %9
================================================
FILE: bin/ccg-postagger
================================================
#!/bin/sh
. ccg-env
"$JAVA" $JAVA_ARGS opennlp.ccg.parse.postagger.BasicPOSTagger $@
================================================
FILE: bin/ccg-postagger.bat
================================================
@echo off
call ccg-env
%JAVA_CMD% opennlp.ccg.parse.postagger.BasicPOSTagger %*
================================================
FILE: bin/ccg-realize
================================================
#!/bin/sh
# Usage: ccg-realize (-g <grammarfile>) <inputfile> (<outputfile>)
. ccg-env
"$JAVA" $JAVA_ARGS opennlp.ccg.Realize $@
================================================
FILE: bin/ccg-realize.bat
================================================
@echo off
rem Usage: ccg-realize (-g <grammarfile>) <inputfile> (<outputfile>)
call ccg-env
rem set HPROF=-Xrunhprof:cpu=times,file=hmm-prof.txt
%JAVA_CMD% opennlp.ccg.Realize %1 %2 %3 %4 %5 %6 %7 %8 %9
================================================
FILE: bin/ccg-supertagger
================================================
#!/bin/sh
. ccg-env
"$JAVA" $JAVA_ARGS opennlp.ccg.parse.supertagger.WordAndPOSDictionaryLabellingStrategy $@
================================================
FILE: bin/ccg-supertagger.bat
================================================
@echo off
call ccg-env
%JAVA_CMD% opennlp.ccg.parse.supertagger.WordAndPOSDictionaryLabellingStrategy %*
================================================
FILE: bin/ccg-test
================================================
#!/bin/sh
# For usage, do: ccg-test -h
. ccg-env
"$JAVA" $JAVA_ARGS opennlp.ccg.test.Regression "$@"
================================================
FILE: bin/ccg-test.bat
================================================
@echo off
rem For usage, do: ccg-test -h
call ccg-env
rem set HPROF=-Xrunhprof:cpu=times,file=hmm-prof.txt
%JAVA_CMD% opennlp.ccg.test.Regression %*
================================================
FILE: bin/ccg-update
================================================
#!/bin/sh
# For usage, do: ccg-update -h
. ccg-env
"$JAVA" $JAVA_ARGS opennlp.ccg.test.UpdateTestbed $@
================================================
FILE: bin/ccg-update.bat
================================================
@echo off
rem For usage, do: ccg-update -h
call ccg-env
%JAVA_CMD% opennlp.ccg.test.UpdateTestbed %*
================================================
FILE: bin/ccg2xml
================================================
#!/bin/sh
python "$OPENCCG_HOME/bin/ccg2xml.py" "$@"
================================================
FILE: bin/ccg2xml.bat
================================================
@echo off
python %OPENCCG_HOME%/bin/ccg2xml.py %*
================================================
FILE: bin/ccg_draw_tree.py
================================================
#
# ccg_draw_tree uses nltk.Tree to draw a tree from a CCGbank .auto file,
# or to draw two trees from two .auto files
#
import sys
if len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv[1] == '--help':
print 'Usage: ccg_draw_tree <autofile> <deriv_id> (<autofile>) (<derivid>)'
sys.exit(0)
autofile = sys.argv[1]
deriv_id = sys.argv[2]
autofile2 = None
deriv_id2 = None
if len(sys.argv) >= 4:
autofile2 = sys.argv[3]
deriv_id2 = deriv_id
if len(sys.argv) >= 5:
deriv_id2 = sys.argv[4]
def get_deriv(autofile, deriv_id):
print 'reading ' + deriv_id + ' from ' + autofile
found_it = False
file = open(autofile, 'rU')
for line in file:
if found_it == True:
return line
if line[0:2] == 'ID':
if line.split()[0].split('=')[1] == deriv_id:
found_it = True
raise NameError('could not find ' + deriv_id + '!')
deriv = get_deriv(autofile, deriv_id)
deriv2 = None
if autofile2 != None:
deriv2 = get_deriv(autofile2, deriv_id2)
print 'importing nltk.Tree'
from nltk import Tree
from nltk.draw.tree import draw_trees
ccgbank_node_pattern = r'<T.*?>'
ccgbank_leaf_pattern = r'<L.*?>'
# nb: the parens around leaves ends up creating blank nodes above leaves
def parse_ccgbank_node(s):
if s =='': return ''
return s.split(' ')[1]
def parse_ccgbank_leaf(s):
tokens = s.split(' ')
return Tree(tokens[1], [tokens[4]])
def excise_empty_nodes(t):
if not isinstance(t,Tree): return t
if t.node == '': return excise_empty_nodes(t[0])
return Tree(t.node, [excise_empty_nodes(st) for st in t])
# nb: returns tree with blank nodes excised
def parse_ccgbank_tree(s):
t = Tree.parse(s,
parse_node=parse_ccgbank_node,
parse_leaf=parse_ccgbank_leaf,
node_pattern=ccgbank_node_pattern,
leaf_pattern=ccgbank_leaf_pattern)
return excise_empty_nodes(t)
print
print 'parsing: ' + deriv
t = parse_ccgbank_tree(deriv)
print t
t2 = None
if deriv2 != None:
print
print 'parsing: ' + deriv2
t2 = parse_ccgbank_tree(deriv2)
print t2
print
if t2 == None:
print 'drawing tree'
draw_trees(t)
else:
print 'drawing trees'
draw_trees(t,t2)
================================================
FILE: bin/dlf_parser.py
================================================
#
# dlf_parser.py (invoked by ccg-draw-graph) uses graphviz's dot to visualize (D)LF graphs
#
# author: Jonathan Barker (with minor contributions by Michael White)
# license: LGPL
#
from xml.etree.ElementTree import ElementTree
import optparse, sys, codecs, xml, os
from collections import defaultdict
# Parse arguments
op = optparse.OptionParser()
op.add_option("-i", "--input", type="string", help="input source: file or <stdin>(default)", default=sys.stdin)
op.add_option("-m", "--moses", type="string", help="file/directory prefix for moses output", default=None)
op.add_option("-v", "--visualize", type="string", help="file/directory prefix for .pdf output", default=None)
op.add_option("-w", "--wordindices", action="store_true", help="include word indices", default=False)
op.add_option("-c", "--classnames", action="store_true", help="include semantic class names", default=False)
(ops, args) = op.parse_args(sys.argv)
# Parse input file
input_source = ops.input if ops.input is sys.stdin else open(ops.input, "rt")
raw = xml.etree.ElementTree.XML(input_source.read())
snum = "None"
att_id = 0
# Get word number
def wordNum(wid):
if wid.startswith("x"):
return -1
else:
return int(wid[1:].strip("f"))
# Get node span
def span(nid, graph, w):
if wordNum(nid) in w:
return []
w.append(wordNum(nid))
for n, e in graph[nid]:
if wordNum(n) not in w:
w.append(wordNum(n))
w.extend(span(n, graph, w))
return w
# findall wrapper
def findAll(elem, match):
return max(elem.findall(match), [])
# Class for representing predicates and attributes
class Pred:
def __init__(self):
self.attrib = []
self.one_of = []
self.opt = []
# Class for representing nodes, contains predicate and attribute information
class Node:
def __init__(self):
self.id = ""
self.className = ""
self.preds = defaultdict(Pred)
def addPred(self, pred, attrib, one_of, opt):
self.preds[pred].attrib.extend(attrib)
self.preds[pred].one_of.extend(one_of)
self.preds[pred].opt.extend(opt)
def moses(self, graph):
tree = " <tree label=\""
# label
pred = self.pred
if pred is None:
pred = self.id
tree += "_".join([pred]+[k.upper()+"_"+v for (k,v) in self.attrib])
# span
tree += "\" span=\""
s = span(self.id, graph, [])
tree += str(min(s))+"-"+str(max(s))
tree += "\"/>"
return tree
def dot(self):
dot_node = self.id+" [label=<"
withClassName = ops.classnames and len(self.className) > 0
if ops.wordindices:
dot_node += self.id
if withClassName:
dot_node += ":"
if withClassName:
dot_node += self.className
if len(self.preds) > 0:
if ops.wordindices or withClassName:
dot_node += ":"
labels = []
for pname, p in self.preds.items():
label = ""
# pred
label += "<FONT POINT-SIZE=\"20.0\">"+pname+"</FONT>"
# att
atts = []
if len(p.attrib) > 0:
atts.append(",".join(["<"+k.upper()+">"+v for (k, v) in p.attrib]))
if len(p.one_of) > 0:
atts.append("|".join(["<"+k.upper()+">"+v for (k, v) in p.one_of]))
if len(p.opt) > 0:
atts.append("("+",".join(["<"+k.upper()+">"+v for (k, v) in p.opt])+")?")
if len(atts) > 0:
label += "<FONT POINT-SIZE=\"8.0\">"+",".join(atts)+"</FONT>"
labels.append(label)
dot_node += " | ".join(labels)
dot_node += ">];\n"
return dot_node
def info(self):
print "Node id:",self.id
for pname, p in self.preds:
print "\tPred:",self.pred
print "\t\tAttrib:",p.attrib
print "\t\tOne_of:",p.one_of
print "\t\tOpt:",p.opt
print "----------------"
# Returns just the id, stripping the class (if any)
def parseId(str):
colonIndex = str.find(":")
if colonIndex > 0: return str[:colonIndex]
else: return str
# Returns the class from the id, or the empty string if none
def parseClass(str):
colonIndex = str.find(":")
if colonIndex > 0: return str[colonIndex+1:]
else: return ""
# Method for parsing <node>
def parseNode(node, graph, nodes):
n = nodes[node.get("id")]
n.id = parseId(node.get("id"))
n.className = parseClass(node.get("id"))
attrib = [(k, v) for (k, v) in node.items() if k not in ["id", "pred"]]
if node.get("pred") is not None:
n.addPred(node.get("pred"), attrib, [], [])
nodes[n.id] = n
for elem in list(node):
if elem.tag == "rel":
parseRel(elem, n.id, graph, nodes, "")
elif elem.tag == "one-of":
parseOneOf(elem, n, attrib, node.get("pred"), graph, nodes)
elif elem.tag == "opt":
parseOpt(elem, n, graph, nodes)
elif elem.tag == "node":
parseNode(elem, graph, nodes)
else:
print snum+": Unexpected tag <"+elem.tag+"> after <node>"
quit()
# Method for parsing <opt>
def parseOpt(opt, node, graph, nodes):
for elem in list(opt):
if elem.tag == "atts":
for pname, p in node.preds.items():
node.addPred(pname, [], [], [(k, v) for (k, v) in elem.items() if k not in ["id", "pred"]])
elif elem.tag == "rel":
parseRel(elem, node.id, graph, nodes, "style=dotted, ")
else:
print snum+": Unexpected tag <"+elem.tag+"> after <node>"
quit()
# Method for parsing <one-of>
def parseOneOf(oneof, node, attrib, pred, graph, nodes):
global att_id
num_att = 0
for elem in list(oneof):
if elem.tag == "atts":
if pred is not None:
node.addPred(pred, [], [(k, v) for (k, v) in elem.items() if k not in ["id", "pred"]], [])
else:
node.addPred(elem.get("pred"), [], [(k, v) for (k, v) in elem.items() if k not in ["id", "pred"]], [])
if len(list(elem)) > 0:
num_att += 1
new_att = Node()
new_att.id = "att"+str(att_id)
att_id += 1
new_att.addPred(str(num_att), [], [], [])
nodes[new_att.id] = new_att
graph[node.id].append((new_att.id, " [style=dashed];\n"))
for rel in list(elem):
parseRel(rel, new_att.id, graph, nodes, "")
elif elem.tag == "rel":
num_att += 1
new_att = Node()
new_att.id = "att"+str(att_id)
att_id += 1
new_att.addPred(str(num_att), [], [], [])
nodes[new_att.id] = new_att
graph[node.id].append((new_att.id, " [style=dashed];\n"))
parseRel(elem, new_att.id, graph, nodes, "")
else:
print snum+": Unexpected tag <"+elem.tag+"> after <one-of>"
quit()
# Method for parsing <rel>
def parseRel(rel, nid, graph, nodes, style):
# <rel>
for subnode in list(rel):
if subnode.tag == "node":
edge_label = " ["+style+"label = \""+rel.get("name")+"\"];\n"
if subnode.get("id") is None:
graph[nid].append((parseId(subnode.get("idref")), edge_label))
else:
graph[nid].append((parseId(subnode.get("id")), edge_label))
parseNode(subnode, graph, nodes)
elif subnode.tag == "one-of":
subnode.set("name", rel.get("name"))
parseRel(subnode, nid, graph, nodes, "style=dashed, ")
else:
print snum+": Unexpected tag <"+subnode.tag+"> after <rel>"
quit()
# <item>
item_no = 0
for item in findAll(raw, "item"):
item_no += 1
if item.get("numOfParses") == "0":
print "Removing "+item.get("info")
else:
snum = item.get("info")
# <lf>
lf_num = 0
for lf in findAll(item, "lf"):
graph = defaultdict(list)
nodes = defaultdict(Node)
# <node>
for node in list(lf):
if node.tag == "node":
parseNode(node, graph, nodes)
else:
print snum+": Unexpected tag <"+node.tag+"> after <lf>"
quit()
# Plot the graph with GraphViz
if ops.visualize != None:
viz_name = ""
if type(item.get("info")) != type("string"):
viz_name = ops.visualize+".item"+str(item_no)+"."+str(lf_num)
else:
viz_name = ops.visualize+"."+item.get("info")+"."+str(lf_num)
viz = codecs.open(viz_name+".dot", "w", "utf-8")
viz.write("digraph lf {\n")
for (k, v) in nodes.items():
viz.write(v.dot())
for (left, rights) in graph.items():
for right in rights:
viz.write(left+"->"+right[0]+right[1])
viz.write("}\n")
viz.close()
os.system("dot -Tpdf "+viz_name+".dot -o "+viz_name+".pdf")
os.system("rm "+viz_name+".dot")
lf_num += 1
================================================
FILE: bin/tccg
================================================
#!/bin/sh
# For usage, do: tccg -h
. ccg-env
"$JAVA" $JAVA_ARGS opennlp.ccg.TextCCG "$@"
================================================
FILE: bin/tccg.bat
================================================
@echo off
rem For usage, do: tccg -h
call ccg-env
%JAVA_CMD% opennlp.ccg.TextCCG %*
================================================
FILE: bin/visccg
================================================
#!/bin/sh
python "$OPENCCG_HOME/bin/ccg_editor.py" "$@"
================================================
FILE: bin/visccg.bat
================================================
@echo off
python %OPENCCG_HOME%/bin/ccg_editor.py %*
================================================
FILE: bin/wccg
================================================
#!/bin/sh
# For usage, do: tccg -h
. ccg-env
"$JAVA" $JAVA_ARGS opennlp.ccg.WebCCG "$@"
================================================
FILE: build.xml
================================================
<!-- $Id: build.xml,v 1.61 2011/12/14 03:11:05 mwhite14850 Exp $ -->
<!-- Copyright (C) 2003-13 Jason Baldridge, Michael White and Scott Martin -->
<project name="OpenCCG" default="package" basedir=".">
<path id="gen.classpath">
<fileset dir="lib" includes="*.jar"/>
</path>
<!-- =================================================================== -->
<!-- Initialization target -->
<!-- =================================================================== -->
<target name="init">
<tstamp/>
<property name="Name" value="OpenCCG"/>
<property name="name" value="openccg"/>
<property name="year" value="2013"/>
<property name="version" value="0.9.5"/>
<echo message="----------- ${Name} ${version} [${year}] ------------"/>
<property name="debug" value="on"/>
<property name="optimize" value="off"/>
<property name="deprecation" value="on"/>
<property name="top.dir" value="."/>
<property name="src.dir" value="./src"/>
<property name="bin.dir" value="./bin"/>
<property name="lib.dir" value="./lib"/>
<property name="docs.dir" value="./docs"/>
<property name="test.dir" value="./test"/>
<property name="packages" value="opennlp.ccg.*,opennlp.ccgbank.*"/>
<property name="build.dir" value="./output"/>
<property name="build.dest" value="./output/classes"/>
<property name="build.gen" value="${top.dir}/output/gen"/>
<property name="build.javadocs" value="${docs.dir}/api"/>
<path id="build.classpath">
<pathelement location="${lib.dir}/ant-junit.jar"/>
<pathelement location="${lib.dir}/ant-launcher.jar"/>
<pathelement location="${lib.dir}/ant.jar"/>
<pathelement location="${lib.dir}/jdom.jar"/>
<pathelement location="${lib.dir}/trove.jar"/>
<pathelement location="${lib.dir}/javacc.jar"/>
<pathelement location="${lib.dir}/jgrapht-jdk1.6.jar"/>
<pathelement location="${lib.dir}/jline.jar"/>
<pathelement location="${lib.dir}/jopt-simple.jar"/>
<pathelement location="${lib.dir}/junit-4.10.jar"/>
<pathelement location="${lib.dir}/serializer.jar"/>
<pathelement location="${lib.dir}/xalan.jar"/>
<pathelement location="${lib.dir}/xml-apis.jar"/>
<pathelement location="${lib.dir}/xsltc.jar"/>
</path>
<filter token="year" value="${year}"/>
<filter token="version" value="${version}"/>
<filter token="date" value="${TODAY}"/>
<filter token="log" value="true"/>
<filter token="verbose" value="true"/>
</target>
<!-- =================================================================== -->
<!-- Help on usage -->
<!-- =================================================================== -->
<target name="usage">
<echo message=""/>
<echo message=""/>
<echo message="OpenCCG build file"/>
<echo message="-------------------------------------------------------------"/>
<echo message=""/>
<echo message=" Available targets are:"/>
<echo message=""/>
<echo message=" package --> generates the openccg.jar file (default)"/>
<echo message=" compile --> compiles the source code"/>
<echo message=" javadoc --> generates the API documentation"/>
<echo message=" test --> runs JUnit tests"/>
<echo message=" clean --> cleans up the compilation directory"/>
<echo message=""/>
<echo message=" See the comments inside the build.xml file for more details."/>
<echo message="-------------------------------------------------------------"/>
<echo message=""/>
<echo message=""/>
</target>
<!-- =================================================================== -->
<!-- Prepares the build directories -->
<!-- =================================================================== -->
<target name="prepare" depends="init">
<!-- create directories -->
<mkdir dir="${build.dir}"/>
<mkdir dir="${build.dest}"/>
<mkdir dir="${build.gen}"/>
</target>
<!-- Runs JavaCC (parser generator) -->
<!-- =================================================================== -->
<target name="javacc"
depends="prepare"
description="generates parser using javacc">
<mkdir dir="${build.gen}/opennlp/ccgbank/parse"/>
<copy file="${src.dir}/opennlp/ccgbank/parse/SimpleNode.java"
todir="${build.gen}/opennlp/ccgbank/parse"/>
<jjtree target="${src.dir}/opennlp/ccgbank/parse/CCGbankDerivation.jjt"
javacchome="${lib.dir}"
outputdirectory="${build.gen}/opennlp/ccgbank/parse" />
<javacc target="${build.gen}/opennlp/ccgbank/parse/CCGbankDerivation.jj"
outputdirectory="${build.gen}/opennlp/ccgbank/parse"
javacchome="${lib.dir}/" />
</target>
<!-- =================================================================== -->
<!-- Compiles the source directory -->
<!-- =================================================================== -->
<target name="compile"
depends="javacc"
description="compiles the source code">
<!-- compile generated JavaCC classes-->
<javac srcdir="${build.gen}"
destdir="${build.dest}"
debug="${debug}"
deprecation="${deprecation}"
classpathref="build.classpath"
nowarn="true"
includeAntRuntime="false"
excludes="**/.backup.orig/**"
optimize="${optimize}"/>
<javac srcdir="${src.dir}"
destdir="${build.dest}"
debug="${debug}"
deprecation="${deprecation}"
classpathref="build.classpath"
nowarn="true"
includeAntRuntime="false"
excludes="**/.backup.orig/**,srilmbridge/,kenlm/"
optimize="${optimize}">
</javac>
<subant antfile="${src.dir}/ccg2xml/build.xml" buildpath="${basedir}"/>
</target>
<!-- =================================================================== -->
<!-- Creates the jar file -->
<!-- =================================================================== -->
<target name="package"
depends="compile"
description="generates the openccg.jar file (default)">
<jar jarfile="${lib.dir}/${name}.jar">
<fileset dir="${build.dest}">
<include name="**"/>
<exclude name="**/alignment/*Test*.class"/>
<exclude name="**/disjunctivizer/*Test*.class"/>
<exclude name="**/hylo/graph/*Test*.class"/>
<exclude name="**/util/*Test*.class"/>
</fileset>
<fileset dir="${src.dir}" includes="**/*.xsl"/>
<fileset dir="${src.dir}" includes="**/*.properties"/>
<!-- for grammardoc -->
<fileset dir="${src.dir}" includes="**/*.css"/>
<fileset dir="${src.dir}" includes="**/*.js"/>
</jar>
</target>
<!-- =================================================================== -->
<!-- Creates the release file -->
<!-- -->
<!-- Note: to create a release based on a clean openccg source -->
<!-- directory, use the latest version in the repository which -->
<!-- should exclude all the compiled and derived files. -->
<!-- =================================================================== -->
<target name="release" depends="document,package">
<subant antfile="${docs.dir}/build.xml" buildpath="${basedir}"
target="clean"/>
<antcall target="clean"/>
<tar tarfile="${name}-${version}.tar">
<tarfileset mode="755"
dir="../"
includes="${name}/bin/** ${name}/ccgbank/bin/**"/>
<tarfileset dir="../"
includes="${name}/**"
excludes="${name}/.* **/CVS **/bin/ **/.backup.orig/ ${name}/classes/** ${name}/output/** ${name}/src/srilmbridge/** ${name}/grammars/**/test/ ${name}/grammars/**/apml/"/>
</tar>
<gzip src="${name}-${version}.tar"
zipfile="../${name}-${version}.tgz" />
<delete file="${name}-${version}.tar" />
</target>
<!-- =================================================================== -->
<!-- Creates the homepage -->
<!-- NB: We haven't been including the API docs on the home page. -->
<!-- NB: To update the homepage: -->
<!-- 1. go to openccg/docs -->
<!-- 2. sftp username@web.sf.net -->
<!-- 3. cd cd /home/groups/o/op/openccg/htdocs -->
<!-- 4. put index.html -->
<!-- =================================================================== -->
<target name="homepage"
depends="init,document"
description="generates the API documentation">
<tar tarfile="${name}-homepage.tar"
basedir="./docs/"
includes="**"
excludes="**/CVS **/.backup.orig" />
<gzip src="${name}-homepage.tar"
zipfile="${build.dir}/${name}-homepage.tgz" />
<delete file="${name}-homepage.tar" />
</target>
<!-- =================================================================== -->
<!-- Creates the documentation -->
<!-- =================================================================== -->
<target name="document" depends="prepare,javacc">
<mkdir dir="${build.javadocs}"/>
<javadoc packagenames="${packages}"
destdir="${build.javadocs}"
classpathref="build.classpath"
author="true"
version="true"
use="true"
splitindex="true"
noindex="false"
windowtitle="${name}"
doctitle="The ${Name} API v${version}"
bottom="Copyright © ${year} Jason Baldridge, Gann Bierner, Michael White and additional contributors. All Rights Reserved.">
<sourcepath>
<pathelement path="${src.dir}"/>
<pathelement path="${build.gen}"/>
</sourcepath>
<link href="http://docs.oracle.com/javase/6/docs/api/"/>
<link href="http://jgrapht.org/javadoc/"/>
<link href="http://www.jdom.org/docs/apidocs/"/>
</javadoc>
<subant antfile="${docs.dir}/build.xml" buildpath="${basedir}"/>
</target>
<!-- =================================================================== -->
<!-- Runs all JUnit tests -->
<!-- =================================================================== -->
<target name="test" depends="compile">
<javac srcdir="${test.dir}" destdir="${build.dest}"
classpathref="build.classpath" debug="on" includeAntRuntime="false"/>
<junit haltonerror="true" fork="off" includeantruntime="false">
<classpath>
<pathelement location="${build.dest}"/>
<pathelement path="${java.class.path}"/>
<pathelement location="${lib.dir}/jdom.jar"/>
<pathelement location="${lib.dir}/jgrapht-jdk1.6.jar"/>
<pathelement location="${lib.dir}/junit-4.10.jar"/>
<pathelement location="${lib.dir}/serializer.jar"/>
<pathelement location="${lib.dir}/trove.jar"/>
</classpath>
<formatter type="plain" usefile="false" />
<batchtest>
<fileset dir="${test.dir}">
<include name="**/*Test*.java"/>
</fileset>
</batchtest>
</junit>
</target>
<!-- =================================================================== -->
<!-- Cleans targets -->
<!-- =================================================================== -->
<target name="clean"
depends="init"
description="cleans up the directory">
<delete dir="${build.dir}"/>
<subant antfile="${src.dir}/ccg2xml/build.xml" buildpath="${basedir}"
target="clean"/>
<delete>
<fileset dir="${bin.dir}" includes="*.pyc"/>
</delete>
</target>
<target name="cleandocs" depends="init" description="cleans up the API docs directory, and extra pdf docs">
<delete dir="${build.javadocs}"/>
<subant antfile="${docs.dir}/build.xml" buildpath="${basedir}" target="clean"/>
</target>
</project>
<!-- End of file -->
================================================
FILE: ccg-format-grammars/arabic/arabic.ccg
================================================
#############################################################
# #
# arabic.ccg #
# #
#############################################################
# Author: Ben Wing <ben@666.com>
# Date: April 2006
# This is a grammar for a fragment of Arabic. It's particularly
# useful for demonstrating the extended use of macros to handle
# complicated morphological inflections.
# See the `tiny' grammar (tiny.ccg) for more info about the format
# of this file.
feature {
CASE<2>: nom, acc, gen;
NUM<2>: sg, du, pl;
GEND<2>: m, f;
STATE<2>: cons, non-cons {indef, def};
ANIM<2>: hum, nonhum;
PERS<2>: 1st, 2nd, 3rd;
RESUMPTIVE<2>: nonres, res;
SEM-NUM<X:NUM>: sg-X, du-X, pl-X;
SEM-PERS<X:PERS>: 1st-X, 2nd-X, 3rd-X;
TENSE<E>: past, pres;
MOOD<E>: indic, subj, juss;
# Here's a more complicated hierarchy, from the original tiny grammar.
ontology: sem-obj {
phys-obj {
animate-being {
person
},
thing
},
situation {
change {
action
},
state
}
};
}
rule {
no typeraise;
typeraise +: n => s;
typeraise - $: n => s;
typeraise - $: pp => s;
typeraise - $: pp/n => s;
typechange: s$1 | n[nom] => s$1 ;
typechange: n<~2>[cons] => n<2>[3rd,def] /* n[gen,def] ;
typechange: n<~2>[cons] => n<2>[3rd,indef] /* n[gen,indef] ;
}
##########################################################################
# Morphological entries #
# (morph.xml) #
##########################################################################
word wa:Conj; # "and"
word anna:Comp; # "that", introducing sentential complements
word inna:Comp; # same, but only after the verb qaal "say"
word maa:InterrogPro(thing): 3rd; # "what"
word man:InterrogPro(person): 3rd; # "who"
word li:Prep; # "what"
word fii:Prep; # "who"
# This word means "this".
word haadhaa {
*: sg, m;
haadhihi: sg, f;
ha_ulaahi: pl;
# Bizarrely, this word declines for case only in the dual.
haadhaani: du, m, nom;
haadhayni: du, m, acc;
haadhayni: du, m, gen;
haataani: du, f, nom;
haatayni: du, f, acc;
haatayni: du, f, gen;
}
# This word means "that".
word dhaalik {
*: sg, m;
tilka: sg, f;
ulaa_ika: pl;
# Bizarrely, this word declines for case only in the dual.
dhaanika: du, m, nom;
dhaynika: du, m, acc;
dhaynika: du, m, gen;
taanika: du, f, nom;
taynika: du, f, acc;
taynika: du, f, gen;
}
# This is the relative pronoun.
word al-ladhii {
*: sg, m;
al-latii: sg, f;
al-ladhiina: pl, m;
al-laati: pl, f;
# Bizarrely, this word declines for case only in the dual.
al-ladhaani: du, m, nom;
al-ladhayni: du, m, acc;
al-ladhayni: du, m, gen;
al-lataani: du, f, nom;
al-latayni: du, f, acc;
al-latayni: du, f, gen;
}
word pro:Pro {
ana: 1st, 1st-X, sg, sg-X;
anta: 2nd, 2nd-X, sg, sg-X, m;
anti: 2nd, 2nd-X, sg, sg-X, f;
huwa: 3rd, 3rd-X, sg, sg-X, m;
hiya: 3rd, 3rd-X, sg, sg-X, f;
naHnu: 1st, 1st-X, pl, pl-X;
antun: 2nd, 2nd-X, pl, pl-X, m;
antunna: 2nd, 2nd-X, pl, pl-X, f;
hum: 3rd, 3rd-X, pl, pl-X, m;
hunna: 3rd, 3rd-X, pl, pl-X, f;
}
word ii:: 1st, 1st-X, sg, sg-X;
word nii:: 1st, 1st-X, sg, sg-X;
word ka:: 2nd, 2nd-X, sg, sg-X, m;
word ki:: 2nd, 2nd-X, sg, sg-X, f;
word hu:: 3rd, 3rd-X, sg, sg-X, m;
word haa:: 3rd, 3rd-X, sg, sg-X, f;
word naa:: 1st, 1st-X, pl, pl-X;
word kum:: 2nd, 2nd-X, pl, pl-X, m;
word kunna:: 2nd, 2nd-X, pl, pl-X, f;
word hum:: 3rd, 3rd-X, pl, pl-X, m;
word hunna:: 3rd, 3rd-X, pl, pl-X, f;
#############################################
# Nouns #
#############################################
# This shows how a reasonably complicated morphology can be accommodated.
# It is certainly possible that some of this may (and probably should)
# be offloaded into a separate morphology-processing engine. However,
# even in that case there is often a good deal more to the lexicon.
# We show a couple examples of complete paradigms, in order to make it
# easier to understand what's going on below.
# Here is a typical noun (kitaab "book") with a broken plural (kutub "books").
# For nouns with broken plurals, the plural is typically declined like
# the singular. Note that Arabic nouns are conjugated for three numbers
# (singular, dual, plural), three cases (nominative, accusative, dative),
# and three states (indefinite, definite, construct). (The construct state
# is used for nouns that are modified by other nouns -- e.g. "book" in
# "the book of Mary".)
# Form Nominative Accusative Dative
# ---------------------------------------------------------------
# sg.indef kitaabun kitaaban kitaabin
# sg.def al-kitaabu al-kitaaba al-kitaabi
# sg.cons kitaabu kitaaba kitaabi
#
# du.indef kitaabaani kitaabayni kitaabayni
# du.def al-kitaabaani al-kitaabayni al-kitaabayni
# du.cons kitaabaa kitaabay kitaabay
#
# pl.indef kutubun kutuban kutubin
# pl.def al-kutubu al-kutuba al-kutubi
# pl.cons kutubu kutuba kutubi
# Here is a typical noun (mudarris "teacher") with a different kind of
# plural, a so-called "strong masculine plural", which has its own declension.
# Form Nominative Accusative Dative
# ---------------------------------------------------------------
# sg.indef mudarrisun mudarrisan mudarrisin
# sg.def al-mudarrisu al-mudarrisa al-mudarrisi
# sg.cons mudarrisu mudarrisa mudarrisi
#
# du.indef mudarrisaani mudarrisayni mudarrisayni
# du.def al-mudarrisaani al-mudarrisayni al-mudarrisayni
# du.cons mudarrisaa mudarrisay mudarrisay
#
# pl.indef mudarrisuuna mudarrisiina mudarrisiina
# pl.def al-mudarrisuuna al-mudarrisiina al-mudarrisiina
# pl.cons mudarrisuu mudarrisii mudarrisii
# Here, we make heavy use of macros.
# This macro says: Every time an expression of the form
# three-form-decl(...) occurs, replace it with the text that comes after.
# The parameters will be substituted into the text. The braces that
# denote the macro's text do *NOT* form part of the text that is substituted.
# Note that macro substitutions are processed recursively: If the text
# of a macro substitution contains calls to other macros, they will also
# be processed. This makes "inheritance" very easy to implement.
# This macro is used for a particular paradigm corresponding to a
# particular number of a word.
def three-different-form-decl(indef-form, def-form, cons-form,
indef-nom, indef-acc, indef-gen,
def-nom, def-acc, def-gen,
cons-nom, cons-acc, cons-gen,
morph-num, sem-num, gend) {
indef-form.indef-nom: morph-num, sem-num, gend, nom, indef;
indef-form.indef-acc: morph-num, sem-num, gend, acc, indef;
indef-form.indef-gen: morph-num, sem-num, gend, gen, indef;
add-al(def-form.def-nom): morph-num, sem-num, gend, nom, def;
add-al(def-form.def-acc): morph-num, sem-num, gend, acc, def;
add-al(def-form.def-gen): morph-num, sem-num, gend, gen, def;
cons-form.cons-nom: morph-num, sem-num, gend, nom, cons;
cons-form.cons-acc: morph-num, sem-num, gend, acc, cons;
cons-form.cons-gen: morph-num, sem-num, gend, gen, cons;
}
# It's questionable whether we should do this. This assimilates al-
# to a following coronal consonant, e.g. ar-rajul, as-sigaara,
# ath-thalj, an-nuur, aDH-DHuhr, etc.
def add-al(form) regsub('^al-([std]h|DH|[tdszrnTDSZL])', 'a\1-\1', al-.form)
def three-form-decl(form, indef-nom, indef-acc, indef-gen,
def-nom, def-acc, def-gen,
cons-nom, cons-acc, cons-gen,
morph-num, sem-num, gend) {
three-different-form-decl(form, form, form,
indef-nom, indef-acc, indef-gen,
def-nom, def-acc, def-gen,
cons-nom, cons-acc, cons-gen,
morph-num, sem-num, gend)
}
# Using the above macro, we create two more macros to handle two common
# paradigm types: Accusative and genitive are the same, and the
# definite is either the same as the construct (two-form-decl-1) or
# the same as the indefinite (two-form-decl-2).
def two-form-decl-1(form, non-cons-nom, non-cons-obl,
cons-nom, cons-obl, morph-num, sem-num, gend) {
three-form-decl(form, non-cons-nom, non-cons-obl, non-cons-obl,
non-cons-nom, non-cons-obl, non-cons-obl,
cons-nom, cons-obl, cons-obl,
morph-num, sem-num, gend)
}
def two-form-decl-2(form, indef-nom, indef-obl,
non-indef-nom, non-indef-obl, morph-num, sem-num, gend) {
three-form-decl(form, indef-nom, indef-obl, indef-obl,
non-indef-nom, non-indef-obl, non-indef-obl,
non-indef-nom, non-indef-obl, non-indef-obl,
morph-num, sem-num, gend)
}
# In turn we create macros for particular paradigms: strong masculine ("uun"),
# strong feminine ("aat"), dual, and basic triptote (the paradigm for
# "kitaab" above and, in general, most singulars).
# Note that an alternative to using braces is to put the macro text on
# the same line as the `def' part of the macro (backslashes can be used
# to join multiple lines together).
def uun-plural(form) two-form-decl-1(form, uuna, iina, uu, ii, pl, pl-X, m)
def aat-plural(form) \
two-form-decl-2(form, aatun, aatin, aatu, aati, pl, pl-X, f)
def dual(form, gend) \
two-form-decl-1(form, aani, ayni, aa, ay, du, du-X, gend)
def triptote(form, morph-num, sem-num, gend) \
three-form-decl(form, un, an, in, u, a, i, u, a, i, morph-num, sem-num, gend)
# Here we define macros for full paradigms for words. Note how semicolons
# are not used, because they are supplied by the macro text itself.
# (Consult the text for three-form-decl() above, and remember that the
# braces denoting the macro text are not actually part of the text. This
# means that if you really want braces as the outermost thing in some
# macro text, you'll need to supply two levels of braces.)
def thing(sing, plur) {
word sing:N(thing) {
triptote(sing, sg, sg-X, m)
dual(sing, m)
triptote(plur, sg, pl-X, f)
}
}
def fem-thing(sing, plur) {
word sing:N(thing) {
triptote(sing.t, sg, sg-X, f)
dual(sing.t, f)
triptote(plur, sg, pl-X, f)
}
}
def person(sing, plur, gend) {
word sing:N(person) {
triptote(sing, sg, sg-X, gend)
dual(sing, gend)
triptote(plur, pl, pl-X, gend)
}
}
def male(sing, plur) person(sing, plur, m)
def female(sing, plur) person(sing, plur, f)
def strong-male(sing) {
word sing:N(person) {
triptote(sing, sg, sg-X, m)
dual(sing, m)
uun-plural(sing)
}
}
# Here we define the actual words. Note how short these definitions are,
# specifying only what's unpredictable.
thing(kitaab, kutub)
thing(waqt, _awqaat)
thing(Harf, Huruuf)
thing(dars, duruus)
thing(waqt, _awqaat)
fem-thing(sigaara, sagaayir)
fem-thing(madiina, mudun)
male(rajul, rijaal)
male(walad, _awlaad)
male(Taalib, Tullaab)
female(bint, _abnaat)
strong-male(mudarris)
word imra_a:N(person) {
three-different-form-decl(imra_at, mar_at, imra_at,
un, an, in, u, a, i, u, a, i,
sg, sg-X, f)
three-different-form-decl(imra_at, mar_at, imra_at,
aani, ayni, ayni, aani, ayni, ayni, aa, ay, ay,
du, du-X, f)
triptote(nisaa_, pl, pl-X, f)
}
def extended_construct_word(stem, plur) {
word stem:N(person) {
three-form-decl(stem, un, an, in, u, a, i, uu, aa, ii, sg, sg-X, m)
dual(stem, m)
triptote(plur, pl, pl-X, m)
}
}
extended_construct_word(_ax, _ixwaan)
extended_construct_word(_ab, _abnaa_)
# Typical paradigms:
# 1sg 'aktaa 'aktaa 'akta |
# 2sg.m taktaa taktaa takta |
# 2sg.f taktayna taktay taktay |
# 3sg.m yaktaa yaktaa yakta |
# 3sg.f taktaa taktaa takta |
# 2du taktayaani taktayaa taktayaa |
# 3du.m yaktayaani yaktayaa yaktayaa |
# 3du.f taktayaani taktayaa taktayaa |
# 1pl naktaa naktaa nakta |
# 2pl.m taktawna taktaw taktaw |
# 2pl.f taktayna taktayna taktayna |
# 3pl.m yaktawna yaktaw yaktaw |
# 3pl.f yaktayna yaktayna yaktayna |
#
# 1sg 'aktuu 'aktuwa 'aktu | 'aktii 'aktiya 'akti
# 2sg.m taktuu taktuwa taktu | taktii taktiya takti
# 2sg.f taktiina taktii taktii | taktiina taktii taktii
# 3sg.m yaktuu yaktuwa yaktu | yaktii yaktiya yakti
# 3sg.f taktuu taktuwa taktu | taktii taktiya takti
# 2du taktuwaani taktuwaa taktuwaa | taktiyaani taktiyaa taktiyaa
# 3du.m yaktuwaani yaktuwaa yaktuwaa | yaktiyaani yaktiyaa yaktiyaa
# 3du.f taktuwaani taktuwaa taktuwaa | taktiyaani taktiyaa taktiyaa
# 1pl naktuu naktuwa naktu | naktii naktiya nakti
# 2pl.m taktuuna taktuu taktuu | taktuuna taktuu taktuu
# 2pl.f taktuuna taktuuna taktuuna | taktiina taktiina taktiina
# 3pl.m yaktuuna yaktuu yaktuu | yaktuuna yaktuu yaktuu
# 3pl.f yaktuuna yaktuuna yaktuuna | yaktiina yaktiina yaktiina
def two-form-past(formv, formc) {
formc.tu: past, 1st, sg;
formc.ta: past, 2nd, m, sg;
formc.ti: past, 2nd, f, sg;
formv.a: past, 3rd, m, sg;
formv.at: past, 3rd, f, sg;
formc.tumaa: past, 2nd, du;
formv.aa: past, 3rd, m, du;
formv.ataa: past, 3rd, f, du;
formc.naa: past, 1st, pl;
formc.tum: past, 2nd, m, pl;
formc.tunna: past, 2nd, f, pl;
formv.uu: past, 3rd, m, pl;
formc.na: past, 3rd, f, pl;
}
def 3rd-weak-past-ay(form) {
form.ay.tu: past, 1st, sg;
form.ay.ta: past, 2nd, m, sg;
form.ay.ti: past, 2nd, f, sg;
form.aa: past, 3rd, m, sg;
form.at: past, 3rd, f, sg;
form.ay.tumaa: past, 2nd, du;
form.ay.aa: past, 3rd, m, du;
form.ataa: past, 3rd, f, du;
form.ay.naa: past, 1st, pl;
form.ay.tum: past, 2nd, m, pl;
form.ay.tunna: past, 2nd, f, pl;
form.aw: past, 3rd, m, pl;
form.ay.na: past, 3rd, f, pl;
}
def 3rd-weak-past-aw(form) {
form.aw.tu: past, 1st, sg;
form.aw.ta: past, 2nd, m, sg;
form.aw.ti: past, 2nd, f, sg;
form.aa: past, 3rd, m, sg;
form.at: past, 3rd, f, sg;
form.aw.tumaa: past, 2nd, du;
form.aw.aa: past, 3rd, m, du;
form.ataa: past, 3rd, f, du;
form.aw.naa: past, 1st, pl;
form.aw.tum: past, 2nd, m, pl;
form.aw.tunna: past, 2nd, f, pl;
form.aw: past, 3rd, m, pl;
form.aw.na: past, 3rd, f, pl;
}
def 3rd-weak-past-ii(form) {
form.ii.tu: past, 1st, sg;
form.ii.ta: past, 2nd, m, sg;
form.ii.ti: past, 2nd, f, sg;
form.iya: past, 3rd, m, sg;
form.iyat: past, 3rd, f, sg;
form.ii.tumaa: past, 2nd, du;
form.iy.aa: past, 3rd, m, du;
form.iy.ataa: past, 3rd, f, du;
form.ii.naa: past, 1st, pl;
form.ii.tum: past, 2nd, m, pl;
form.ii.tunna: past, 2nd, f, pl;
form.uu: past, 3rd, m, pl;
form.ii.na: past, 3rd, f, pl;
}
def strong-past(form) two-form-past(form, form)
# In general, almost all Arabic present-tense verbs of a particular mood
# can be defined using five forms. Verbs with a hamza in the first radical
# have a problem in the first-singular; ideally this should be handled
# automatically using a regexp or something of that sort, but we don't have
# such support currently, so we use an optional param.
def gen-pres(mood, fsing, fsing-fem, fdual, fplur-masc, fplur-fem) {
# This shows how you can use regular expressions if need be.
# regsub(string, regex, repl) is a special built-in that does regular-
# expression substitution on STRING, replacing all occurrences of
# REGEX with REPL. Regular-expression syntax is as in Python.
# In this case, Arabic verbs have a phonetic rule that eliminates
# two glottal stops occurring near each other at the beginning of a
# word. For example, _a_kulu -> _aakulu, and _u_kalu -> _uukalu.
# (That is, the vowel is lengthened.)
# _ . regsub(foo, bar, fsing): pres, mood, 1st, sg;
_ . regsub('^([aiu])_', '\1\1', fsing): pres, mood, 1st, sg;
# _.fsing: pres, mood, 1st, sg;
t.fsing: pres, mood, 2nd, m, sg;
t.fsing-fem: pres, mood, 2nd, f, sg;
y.fsing: pres, mood, 3rd, m, sg;
t.fsing: pres, mood, 3rd, f, sg;
t.fdual: pres, mood, 2nd, du;
y.fdual: pres, mood, 3rd, m, du;
t.fdual: pres, mood, 3rd, f, du;
n.fsing: pres, mood, 1st, pl;
t.fplur-masc: pres, mood, 2nd, m, pl;
t.fplur-fem: pres, mood, 2nd, f, pl;
y.fplur-masc: pres, mood, 3rd, m, pl;
y.fplur-fem: pres, mood, 3rd, f, pl;
}
# The "two-form" present uses normal (non-3rd-weak) endings but may
# have two forms of the root, one form vocalic endings (almost all of them)
# and one for consonant endings (only the feminine plural). This
# encompasses 2nd-weak verbs and doubled verbs, and (trivially) strong verbs.
def two-form-pres-indic(formv, formc) {
gen-pres(indic, formv.u, formv.iina, formv.aani, formv.uuna, formc.na)
}
def two-form-pres-subj(formv, formc) {
gen-pres(subj, formv.a, formv.ii, formv.aa, formv.uu, formc.na)
}
# The jussive is different because the base form (fsing) has no ending.
# This means that it may assume the consonant form instead of the vowel
# form, or may have a number of variants (in particular, for doubled
# verbs). So the base form needs to be given explicitly.
def two-form-pres-juss(base, formv, formc) {
gen-pres(juss, base, formv.ii, formv.aa, formv.uu, formc.na)
}
def strong-pres(form) {
two-form-pres-indic(form, form)
two-form-pres-subj(form, form)
two-form-pres-juss(form, form, form)
}
def 2nd-weak-pres(formv, formc) {
two-form-pres-indic(formv, formc)
two-form-pres-subj(formv, formc)
two-form-pres-juss(formc, formv, formc)
}
def doubled-pres(formv, formc) {
two-form-pres-indic(formv, formc)
two-form-pres-subj(formv, formc)
two-form-pres-juss(formc, formv, formc)
two-form-pres-juss(formv.a, formv, formc)
two-form-pres-juss(formv.i, formv, formc)
}
# Verbs whose third radical is a /w/ or a /y/ have all manner of exceptional
# forms; easiest just to list them. In general, there are three types,
# depending on whether the base singular forms end in -aa, -ii, or -uu.
def 3rd-weak-pres-aa(form) {
gen-pres(indic, form.aa, form.ayna, form.ayaani, form.awna, form.ayna)
gen-pres(subj, form.aa, form.ay, form.ayaa, form.aw, form.ayna)
# Note the shortened vowel here.
gen-pres(juss, form.a, form.ay, form.ayaa, form.aw, form.ayna)
}
def 3rd-weak-pres-ii(form) {
gen-pres(indic, form.ii, form.iina, form.iyaani, form.uuna, form.iina)
gen-pres(subj, form.iya, form.ii, form.iyaa, form.uu, form.iina)
# Note the shortened vowel here.
gen-pres(juss, form.i, form.ii, form.iyaa, form.uu, form.iina)
}
def 3rd-weak-pres-uu(form) {
gen-pres(indic, form.uu, form.iina, form.uwaani, form.uuna, form.uuna)
gen-pres(subj, form.uwa, form.ii, form.uwaa, form.uu, form.uuna)
# Note the shortened vowel here.
gen-pres(juss, form.u, form.ii, form.uwaa, form.uu, form.uuna)
}
def 2nd-weak-verb(pastv, props, pastc, presv, presc) {
word pastv: props {
two-form-past(pastv, pastc)
2nd-weak-pres(presv, presc)
}
}
# Note the way that macro calls can be constructed as well. Here, the
# value of PAST_TYPE is the suffix at the end of the macro name.
def 3rd-weak-verb(past_stem, props, past_type, pres_stem, pres_type) {
word past_stem . past_type: props {
3rd-weak-past- . past_type(past_stem)
3rd-weak-pres- . pres_type(pres_stem)
}
}
def strong-verb(past, props, pres) {
word past: props {
strong-past(past)
strong-pres(pres)
}
}
2nd-weak-verb(kaan, TransV(pred=be), kun, akuun, akun)
2nd-weak-verb(naam, IntransV(pred=sleep), nim, anaam, anam)
2nd-weak-verb(qaal, SayV(pred=say), qul, aquul, aqul)
strong-verb(katab, TransV(pred=write), aktub)
strong-verb(dhahab, IntransV(pred=go), adhhab)
# Note that the following verb, which begins with a glottal stop,
# will have a modification made to it in the first-person singular present.
# (See above.)
strong-verb(_akal, IntransV TransV (pred=eat), a_kul)
3rd-weak-verb(ra_, TransV(pred=see), ay, ar, aa)
3rd-weak-verb(_a9T, DitransV(pred=give), ay, u9T, ii)
3rd-weak-verb(laq, TransV(pred=find), ii, alq, aa)
strong-verb(9araf, ThinkV(pred=know), a9rif) # see also 9alam
strong-verb(tafakkar, ThinkV(pred=think), atafakkar)
strong-verb(ta9allam, ThinkV(pred=learn), ata9allam)
family N {
entry: n<2>[X, 3rd, nonres]: X:sem-obj(*);
}
family InterrogPro(Pro) {
entry: s/*(s/n<2>[res]);
entry: s/*(s|n<2>[nonres,nom]);
entry: s/*(s/n<2>[nonres,acc]);
member: maa, man;
}
family Pro {
entry: n<2>[X, nom, def, nonres]: X:sem-obj(*);
member: pro;
}
family Rel {
entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s/n<2>[res]);
entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s|n<2>[nonres,nom]);
entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s/n<2>[nonres,acc]);
member: al-ladhii;
}
family AndConj(Conj) {
entry: n[pl, CASE, STATE] \* n[CASE, STATE] /* n[CASE, STATE];
entry: s$1 \* s$1 /* s$1;
member: wa;
}
family Det(indexRel=det) {
entry: n<2>[X, def, nonres] /^ n<2>[X]: X:sem-obj(<det>*);
member: haadhaa, dhaalik;
}
# good luck on this one! construct even more complicated ones!
#
#ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu al-kilaaba al-latii akalat sagaayira mudarrisii al-waladi
#"the man that wrote the books saw and gave to his daughter the dogs that ate the cigarettes of the boy's teachers."
# Works, correctly:
#ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat haa li ha_ulaahi al-mudarrisiina
#Bad
#ar-rajula al-ladhii katabat al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat haa li ha_ulaahi al-mudarrisiina
#"the man that wrote the books saw and gave to his daughter the cigarettes that I said that the boy thought that the woman gave them to those teachers"
# Fails, correctly:
#ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat hu li ha-ulaahi al-mudarrisiina
# Works:
# ar-rajulu al-ladhii al-waladu _a9Taa as-sagaayira li binti hu dhahaba
# ar-rajulu _a9Taa li binti hu as-sagaayira
# ar-rajulu al-ladhii waladu hu _a9Taa as-sagaayira li al-binti dhahaba
# Won't work:
# ar-rajulu al-ladhii al-waladu _a9Taa li binti hu as-sagaayira dhahaba
# ar-rajulu al-ladhii waladu hu _a9Taa li al-binti as-sagaayira dhahaba
family PossClitic(Cli, indexRel=poss) {
entry: n<~1>[X, def, nonres] \* n<1>[X, cons]: X:sem-obj(<poss>*);
entry: (n<~1>[X, def, nonres] / n<2>[res]) \* n<1>[X, cons]: X:sem-obj(<poss>*);
member: ii, ka, ki, hu, haa, naa, kum, kunna, hum, hunna;
}
family ObjClitic(Cli, indexRel=poss) {
entry: (s$1 | n<3>[nonres]) \ (s$1 / n<2>[nonres] | n<3>[nom,nonres]);
entry: (s$1 / n<~2>[res] | n<3>[nonres]) \ (s$1 / n<2>[nonres] | n<3>[nom,nonres]);
member: nii, ka, ki, hu, haa, naa, kum, kunna, hum, hunna;
}
family Adj(indexRel=adj) {
entry: n<2>[X, NUM, GEND, CASE, STATE] \ n<2>[X]:
X:sem-obj(<adj>*);
}
family Prep-Nom(Prep, indexRel="*NoSem*") {
# The pp<~3> notation generates an 'inheritsFrom' tag rather than
# an 'id' tag for the feature structure.
entry: pp<~3>[lex=*] /< n<3>[gen,nonres];
member: li, fii;
}
family Comp(indexRel="*NoSem*") {
entry: sbar<~1>[lex=*] / s<1>;
member: anna, inna;
}
family IntransV(V) {
entry: s[E] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
E:action(* ^ <Actor>X:animate-being);
}
family SayV(V) {
entry: s[E] / sbar[Z, lex=inna] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
E:action(* ^ <Actor>X:animate-being ^ <Situation>Z);
}
family ThinkV(V) {
entry: s[E] / sbar[Z, lex=anna] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
E:action(* ^ <Actor>X:animate-being ^ <Situation>Z);
}
family TransV(V) {
entry: s[E] / n[Y,acc,nonres] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
E:action(* ^ <Actor>X:animate-being ^ <Patient>Y:sem-obj);
}
family DitransV(V) {
# The first slash (on the pp) is marked with a mode allowing backward xcomp.
entry: s[E] /< pp[Z,lex=li] / n[Y,acc,nonres] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
E:action(* ^ <Actor>X:animate-being ^ <Patient>Y:sem-obj ^
<Recipient>Z:animate-being) ;
}
testbed {
# different states of subject
rajulun dhahaba: 1;
ar-rajulu dhahaba: 1;
rajulu dhahaba: 0;
# subject-verb agreement
rajulun dhahabuu: 0;
ar-rajulu dhahabuu: 0;
rajulu dhahabuu: 0;
# gender agreement
ar-rajulu dhahaba: 1;
ar-rajulu dhahabat: 0;
al-bintu dhahaba: 0;
al-bintu dhahabat: 1;
# possession
rajulun hu dhahaba: 0;
ar-rajulu hu dhahaba: 0;
rajulu hu dhahaba: 1;
# subject case
ar-rajula dhahaba: 0;
ar-rajuli dhahaba: 0;
# case in construct phrase
_axuu ar-rajuli dhahaba: 1;
_axuu ar-rajula dhahaba: 0;
_axuu ar-rajulu dhahaba: 0;
# construct state in construct phrase
_axun ar-rajuli dhahaba: 0;
al-_axu ar-rajuli dhahaba: 0;
# object case
ar-rajulu ra_aa al-kitaaba: 1;
ar-rajulu ra_aa al-kitaabi: 0;
ar-rajulu ra_aa al-kitaabu: 0;
# preposition case
ar-rajulu _a9Taa al-kitaaba li al-waladi: 1;
ar-rajulu _a9Taa al-kitaaba li al-waladu: 0;
ar-rajulu _a9Taa al-kitaaba li al-walada: 0;
# subcategorization
ar-rajulu ra_aa al-kitaaba li al-waladi: 0;
# backward xcomp
ar-rajulu _a9Taa li al-waladi al-kitaaba: 1;
_a9Taa ar-rajulu li al-waladi al-kitaaba: 1;
# object clitics
ana ra_aytu hu: 1;
ra_aytu hu ana: 1;
ra_aytu ana hu: 0;
hu ra_aytu ana: 0;
hu ana ra_aytu: 0;
huwa ra_aa nii: 1;
huwa ra_aa ii: 0;
huwa ra_aa ana: 0;
ar-rajulu _a9Taa haa li al-waladi: 1;
_a9Taa haa ar-rajulu li al-waladi: 1;
# relative clauses
# "I gave it to the man that the girl saw him"
_a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at hu: 3;
# "I gave it to the man that the girl saw her"
_a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at haa: 0;
# "I gave it to the man that the girl saw"
_a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at: 0;
# "I gave it to the man that the girl saw the boy"
_a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at al-walada: 0;
}
================================================
FILE: ccg-format-grammars/inherit/inherit.ccg
================================================
# A minimal grammar which shows inheritance.
# Jason Baldridge, September 2007
################## Features #################
feature {
case: nom acc;
}
################## Words #################
word John:NP (pred=john);
word Fido:NP (pred=fido);
word food:NP (pred=food);
word to:PP;
word sleeps:IntransV (pred=sleep);
word saw:TransV (pred=see);
word gave:DitransV (pred=give);
################## Categories #################
family NP {
entry: np[X]:X(*);
}
family PP {
entry: pp[X]/np[X]:X(<Prep>*);
}
def iv_cat (PostSyn, MoreSem) {
s[E] \ np[X nom] PostSyn: E(* <Actor>X MoreSem)
}
def tv_cat (PreSyn, PostSyn, MoreSem) {
iv_cat(PreSyn / np[Y acc] PostSyn, <Patient>Y MoreSem)
}
family IntransV(V) {
entry: iv_cat(,);
}
family TransV(V) {
entry: tv_cat(,,);
}
family DitransV(V) {
entry: tv_cat( , / np[Z acc] , <Addressee>Z);
entry: tv_cat(/ pp[Z acc] , , <Addressee>Z);
}
################## Test sentences #################
testbed {
John sleeps: 1;
John saw Fido: 1;
John gave Fido food: 1;
John gave food to Fido: 1;
}
================================================
FILE: ccg-format-grammars/tiny/tiny.ccg
================================================
#############################################################
# #
# tiny.ccg #
# #
#############################################################
# Author: Ben Wing <ben@666.com>
# Date: April 2006
# This is derived from Geert-Jan M. Kruijff's 'coordination' grammar,
# which was simplified, revised and extended.
#
# David Reitter, dreitter at inf.ed.ac dot uk, 01/2005
# Michael White, mwhite at inf dot ed.ac.uk, 01/2005
# This is the `tiny' grammar, in .ccg format. There are a bunch
# of comments below to explain the format. See also the `arabic'
# grammar for an example of extended macro use to handle a complex
# morphology.
# To generate a grammar for use with OpenCCG, use `genccg' (or whatever
# it's currently called). This generates the various XML files needed for
# OpenCCG (grammar.xml, lexicon.xml, morph.xml, types.xml, and rules.xml).
# After doing this, you can run `tccg' to load the grammar and use it for
# parsing.
# Some general notes about this format:
# -- The general feel of the syntax is like C, Java or Perl. Indentation
# and whitespace is unimportant. (The only exception is in macro
# definitions, where the text of a macro must either be on the same line
# as the definition -- possibly extended with backslash
# line-continuation markers -- or be enclosed in braces.)
#
# -- The syntax tries to be very forgiving of the usage of commas and
# semicolons, for the benefit of macro definitions. In most lists, in
# fact, commas are optional -- arguments can be separated by no commas,
# one comma, or many commas, and extra commas can occur at the end of
# the list. All of this makes macro definitions and macro calls much
# easier. We usually write such lists below without commas.
#
# The main exception where commas matter is in macro calls. You can
# still put an extra comma at the end of a macro call, but otherwise you
# must have exactly one comma (no more, no less) between arguments. The
# reason is that macro arguments can contain pretty much any text
# whatsoever (including no text at all), so commas are needed to
# indicate where one argument stops and the next one starts.
#
# NOTE: The parser pays attention to parens, brackets, and braces in the
# text of a macro call argument, and will not get confused by commas
# inside of matched delimiters. Thus, a macro call `foo(a, bar(b, c))'
# is correctly interpreted as a call to foo() with two arguments, `a'
# and `bar(b, c)', and *NOT* a call to foo() with three arguments `a',
# `bar(b', and `c)'.)
#
# Note also how the text of macro definitions and calls can optionally
# be surrounded by braces, to clearly delimit the text boundaries (see
# below). It's important to note that the braces do *NOT* form part of
# the text itself; if you really want your text surrounded by braces,
# you need to add a second set around the text.
#
# -- You can use the `-t' option to see what things look like after macro
# substitutions have been applied.
#
# -- The order of declarations in this file is not significant except
# that macros and features must be defined before they can be used.
#
# -- You can surround literal text (word stems, inflections, etc.) with
# single or double quotes in order to insert characters into the text
# that would otherwise be interpreted by the parser, such as spaces,
# colons, etc. Without such quotes, the only characters that can occur
# in a word are letters, numbers, '_', '-', '+' and '%'. Note, however,
# that there is no such thing as "reserved words" in this format; words
# like 'feature', 'word', 'entry', etc. that have a special significance
# in the right place in the syntax can otherwise be freely used as word
# stems and inflections, macro variable names, part of speech tags, etc.
#############################################################
# #
# FEATURES #
# #
#############################################################
# Declaration of features. This is used as follows:
#
# (1) So that you can just specify a feature value inside brackets below,
# and the appropriate feature is substituted. That is, a declaration
# like n[nom] is equivalent to n[CASE=nom].
#
# (2) To specify the types listed in types.xml.
#
# (3) To specify the feature values used in word declarations. These
# compile out into <macro ...> declarations in morph.xml (unrelated to
# our own use of macros). In order for this to happen, however, you
# *must* specify a "macro-tie", inside of parentheses after the feature
# type. This is either a number (for a syntactic macro) or a letter
# (for a semantic macro). See below for exactly how these are used.
#
# (A "strict feature" mode may be implemented at some point, where all
# features that are used in category definitions must be declared.)
feature {
# A number following the feature type, as follows, is used for syntactic
# features that can be attached to a word. The number is a feature-
# structure ID; when an atomic category in a lexical category definition
# has the corresponding ID, these features will be inserted into that
# category. Hence, the <2> here corresponds to the <2> that occurs
# below in the definition of Noun, Det, verbcat(), etc.
CASE<2>: nom acc;
NUM<2>: sg pl;
# You can specify more than one number if you want.
# NUM<2,3>: sg pl;
# A word in place of a number causes semantic macros to be created; if
# used in a word {} declaration, the relevant info will be inserted into
# the hybrid logic part of a lexical category declaration, attached to
# the argument of the given name. A declaration like (X:NUM) means that
# 'NUM' is used in the XML <diamond mode="..."> declaration in place of
# the feature's actual name. (FIXME: I don't know what the significance
# of this is.)
sem-num<X:NUM>: sg-X pl-X;
TENSE<E>: past pres;
# You can create hierarchical values as shown.
PERS<2>: non-3rd {1st 2nd} 3rd;
# Alternatively, you can explicitly list the parent(s) of a feature --
# e.g. to create multiple-inheritance hierarchies.
#alternate-pers<2>: non-3rd 1st[non-3rd] 2nd[non-3rd] 3rd;
# Here's a more complicated hierarchy, from the original tiny grammar.
ontology: sem-obj {
phys-obj {
animate-being {
person
}
thing
}
situation {
change {
action
}
state
}
};
# Here we show how you can mix the two ways of declaring hierarchies,
# if you have a primarily single-inheritance hierarchy but with certain
# multiply-inherited values. In this hierarchy, 'werewolf' gets both
# 'man' and 'wolf' as its parents. 'intersex' will have only 'man' and
# 'woman' as parents -- 'rational-being' will not be specified as it's
# redundant.
entity: being {
# We would call this `thing' but that is used above in
# the `ontology' hierarchy, and causes a warning to be issued
thing2
animate {
irrational-being {
fish
mammal { dog-like {dog wolf} cat horse }
}
rational-being {
man {
centaur[horse]
werewolf[wolf]
}
woman {
mermaid[fish]
}
intersex[man woman]
}
}
}
nothingness;
}
# You can also specify that a feature is "distributive" and/or give
# licensing information for the realizer.
#
# Here's the equivalent of the stuff in flights/lexicon.xml and
# flights/types.xml.
#
# A ! before a feature makes it distributive. You can specify
# licensing-related attributes on either a feature or a feature-value, in
# the standard way of specifying attributes in .ccg format.
feature {
!owner(location=args-only, instantiate=false);
!info;
form: dcl-base {dcl, fronted},
q-base {q(license-marked-cats=true, also-licensed-by=q-base),
wh(license-marked-cats=true, also-licensed-by=q-base)},
base,
emb(location=target-only),
inf(location=target-only),
adj(location=target-only),
ng;
}
# If you really want the feature value as used in the word {} declarations
# to be different from the feature value elsewhere, you can do that.
# This allows you to generate the following XML:
#
# <macro name="@acc0">
# <fs id="0" attr="case" val="p-case"/>
# </macro>
# <macro name="@acc1">
# <fs id="1" attr="case" val="p-case"/>
# </macro>
# <macro name="@acc2">
# <fs id="2" attr="case" val="p-case"/>
# </macro>
# <macro name="@acc3">
# <fs id="3" attr="case" val="p-case"/>
# </macro>
#
#feature {
# case<0>: acc0:p-case;
# case<1>: acc1:p-case;
# case<2>: acc2:p-case;
# case<3>: acc3:p-case;
#}
# NOTE: (1) This doesn't quite work in ccg2xml yet, since only `acc3' gets
# added to the hierarchy in types.xml. (2) The entire motivation for doing
# this kind of thing is kludgy, and will be eliminated by allowing features
# to be explicitly specified for the result of a lexical insertion rule,
# much like the way that type-raising rules currently work.
# Here's how you specify a relation-sorting order, in case you care.
relation-sorting: foo * bar baz;
#############################################################
# #
# WORDS #
# #
#############################################################
#
################## Functional words #################
#
# Some examples of words.
#
# The format of word declarations is
#
# word STEM:FAMILY ...(ATTRS): FEATURES;
#
# or
#
# word STEM:FAMILY ...(ATTRS) { INFLECTED-FORM: FEATURES; ...}
#
# where STEM is the word's stem, FAMILY is a list of the families that a
# word is part of, and ATTRS specifies any other attributes associated
# with the word. FEATURES gives the word's features; these come from the
# feature {} declarations above. (NOTE: Only feature values whose features
# specify a "macro-tie" value -- something in <> following the
# feature's name -- can be used. See above.)
#
# ATTRS is a list; each attribute is either a specification ATTRIBUTE=VALUE
# or a single VALUE (equivalent to class=VALUE). The useful attributes are
#
# class Semantic class of a word.
# pred Semantic predicate of a word, used in the logical form;
# if omitted, defaults to the word's stem.
# excluded List of excluded lexical categories.
# coart Boolean indicating that this entry is a coarticulation,
# eg a pitch accent, gesture, or other word-associated element.
#
# Any of FAMILY, ATTRS and/or FEATURES can be omitted.
#
# The second form above, with braces, is used for words with different
# inflections. Instead of specifying the features directly after the word,
# you list the features for each inflection separately. Note that * is
# shorthand for the stem itself.
#
# Note that there can be more than one word {} declaration for a single stem.
#
# The families in FAMILY can be either a family name, from a family {}
# block, or a part of speech. (`genccg' will derive the appropriate parts
# of speech from any families given when creating the XML file.) Note that
# the words associated with a particular family can be specified either by
# tagging each word with its family, by listing a family's words explicitly
# using the `member' declaration inside of a family {} block, or by a
# combination of the two.
word the:Det;
word some:Det;
word a:Det: sg;
word for;
word pro1:Pro(animate-being) {
I: 1st sg nom sg-X;
me: 1st sg acc sg-X;
we: 1st pl nom pl-X;
us: 1st pl acc pl-X;
}
word pro2:Pro(animate-being) {
you: 2nd;
}
word pro3f:Pro(animate-being) {
she: 3rd sg nom sg-X;
her: 3rd sg acc sg-X;
}
word pro3m:Pro(animate-being) {
he: 3rd sg nom sg-X;
him: 3rd sg acc sg-X;
}
word pro3n:Pro(thing) {
it: 3rd sg sg-X;
}
# If we want the CLASS associated with only some of the inflections,
# we can use the same stem in more than one decl. (Assigning neuter to
# "they" is strange but that's how the original morph.xml did it!)
word pro3n:Pro {
they: 3rd pl nom pl-X;
them: 3rd pl acc pl-X;
}
#
################## Nouns #################
#
# Here, we make use of macros. The basic idea behind macros is simple:
# They simply do string substitution. However, parameters can be given,
# making them very powerful.
# This macro says: Every time an expression of the form basic-noun(...)
# occurs, replace it with the text that comes after. The parameters will
# be substituted into the text. The braces that denote the macro's text do
# *NOT* form part of the text that is substituted. (Alternatively, you
# can put the entire replacement text on the same line as the macro
# definition, using a backslash at the end of the line if needed in order
# to continue the definition on the next line.) Likewise, braces can be
# used to surround text in an argument to a macro call and will not form of
# the argument's text. This is useful when the argument's text contains
# commas. In either case, if you really want the text itself to have
# braces around it, you need to put two sets of braces around it.
# Note that macro substitutions are processed recursively: If the
# text of a macro substitution contains calls to other macros, they will
# also be processed. This makes "inheritance" very easy to implement.
# Inside of a macro definition's text, the . operator can be used; this
# concatenates two words together into a single word. See the definition
# of normal-noun() below for a simple example.
# Substitution of arguments does not occur inside of quoted text.
def basic-noun(sing, plur, class) {
word sing:N(class) {
*: sg sg-X;
plur: pl pl-X;
}
}
#def normal-noun(stem, class) {
# word stem:N(class) {
# *: sg sg-X;
# stem . s: pl pl-X;
# }
#}
# or equivalently, using our definition of basic-noun():
# def normal-noun(stem, class) basic-noun(stem, stem . s, class)
# But in fact, we do something more clever to handle pluralization.
# Here we make use of some built-in macros():
#
# regsub(PATTERN, REPLACEMENT, TEXT):
# Replace all occurrences of regexp PATTERN with REPLACEMENT in TEXT.
# This follows normal Python conventions for regular expression
# substitution.
#
# ifmatch(PATTERN, TEXT, IF-TEXT, ELSE-TEXT):
# If TEXT matches PATTERN at its beginning, substitute IF-TEXT; else,
# substitute ELSE-TEXT.
#
# ifmatch-nocase(PATTERN, TEXT, IF-TEXT, ELSE-TEXT):
# Just like ifmatch() but its pattern-matching is case-insensitive.
def pluralize(word) {
# This shows a complicated expression involving the built-ins
# 'ifmatch' and 'regsub'. If the word ends in a vowel + o or y,
# we add s. Else, if the word ends in (consonant) + o or y, or s, sh, ch,
# or x, we change y to i and add es. Finally, in all other cases,
# just add s. So buy -> buys, boy-> boys, but try -> tries, lady -> ladies.
# Similarly, go -> goes but goo -> goos. For words like volcano -> volcanos
# you have to put the forms in manually (or change the rule here, and put
# forms in -o + es in manually, e.g. does, goes, tomatoes, potatoes).
#
ifmatch('^.*[aeiou][oy]$', word, word . s,
ifmatch('^.*([sxoy]|sh|ch)$', word, regsub('^(.*)y$', '\1i', word) . es,
word . s))
}
def noun(sing, class) basic-noun(sing, pluralize(sing), class)
noun(book, thing)
noun(DVD, thing)
noun(glass, thing)
noun(church, thing)
noun(flower, thing)
noun(bath, thing)
noun(teacher, person)
noun(lady, person) # Pluralized (correctly) to 'ladies'
noun(boy, person) # Pluralized (correctly) to 'boys'
# An irregular noun.
basic-noun(policeman, policemen, person)
#
################## Verbs #################
#
# The "props" parameter specifies families or attributes.
def basic-verb(stem, props, 3sing, pasttense) {
word stem:props {
*: pres non-3rd sg;
3sing: pres 3rd sg;
*: pres pl;
pasttense: past;
}
}
def verb(stem, props, pasttense) \
basic-verb(stem, props, pluralize(stem), pasttense)
verb(buy, TransV DitransBenV, bought)
verb(rent, TransV DitransBenV, rented)
verb(go, IntransV, went)
verb(sleep, IntransV, slept)
# Here we show how you can specify a predicate or other attribute.
# Admittedly this is not too useful here, but it can be much more so in
# the case of a foreign language, where we want the semantic predicates
# to be in English. (See arabic.ccg.)
verb(eat, TransV IntransV (pred=manjar), ate)
#############################################################
# #
# RULES #
# #
#############################################################
# This declaration specifies the contents of rules.xml. Each statement
# specifies a single rule; it is also possible for statements to cancel
# some or all rules.
#
# Note that some rules are enabled by default; this includes application,
# composition and crossed composition (forward and backward in each case),
# as well as forward type-raising from np to s/(s\np) and backward
# type-raising from np to s$1\(s$1/np).
rule {
# turn off forward cross-composition
no xcomp +;
# this is how we could turn off all type-raising rules.
# no typeraise;
# Declare a backward type-raising rule from pp to s$1\(s$1/pp).
# The $ causes a dollar-sign raise category to be created, as shown;
# without it, we'd just get s\(s/pp).
typeraise - $: pp => s;
# Declare a type-changing rule to enable pro-drop (not useful in English!)
# typechange: s[finite]\np[nom]$1 => s[finite]$1 ;
}
# This shows how you can turn off all defaults and specify your own
# properties from scratch, if you want.
# rule {
# no; # remove all defaults
# app +-;
# comp +-; # +- means both forward and backward
# xcomp -;
# sub +-;
# xsub +-;
# # Defaults for typeraising are np => s, if omitted.
# typeraise +;
# typeraise - $;
# }
#
#############################################################
# #
# CATEGORIES #
# #
#############################################################
# Specify lexical families and the lexical insertion rules for each
# family. Properties of the family can be given in parens after the
# family name. The family name should either be the same as a part
# of speech, or the part of speech should be given in parens after
# the family name.
# Categories can be specified in an expected form, e.g. s\np[nom]/np[acc].
# The notation np<3>[acc] corresponds to XML code like this:
# <atomcat type="np">
# <fs id="3">
# <feat attr="CASE" val="acc"/>
# </fs>
# </atomcat>
# i.e. the <3> specifies the feature-structure ID, and the [acc]
# specifies a <feat> constraint. In this case, a constraint of the form
# [CASE=acc] is generated because of the feature {} declaration above.
# If an unknown value is given, e.g. [foo], it's assumed to be a feature,
# rather than a feature value, and you get code like
# <feat attr="foo"> <featvar name="FOO"/> </feat>
# <feat attr="CASE" val="acc"/>
# You can also write [foo=bar] to explicitly give a feature and value.
# And an entry like [X] corresponds to
# <feat attr="index"> <lf> <nomvar name="X"/> </lf> </feat>
# Note also that slashes can be followed by a slash mode, e.g. /<, or
# the mode can be omitted; in this case, a default mode is generated:
# \<, />, |.
family Det(indexRel=det) {
entry: np<2>[X PERS=3rd] /^ n<2>[X]:
X:sem-obj(<det>*);
}
family Prep-Nom(Prep, indexRel="*NoSem*") {
# You can name your entries, as shown, although it's not clear there's
# much point.
#
# The pp<~3> notation generates an 'inheritsFrom' tag rather than an 'id'
# tag for the feature structure. This unifies only the properties not
# explicitly given in the category, i.e. everything but 'lex' will unify.
#
# The entry [lex=*] corresponds to <feat attr="lex" val="[*DEFAULT*]"/>
# and means that a feature 'lex' will be attached, whose value is the
# word stem.
entry Nominal: pp<~3>[lex=*] /< np<3>[acc];
# If members are specified, the family defaults to 'closed'.
member: for;
}
family Conj {
entry: np[NUM, X0] \* np[X1] / np[X2]:
X0(and ^ <First>(L1 ^ elem ^ <Item>X1 ^
<Next>(L2 ^ elem ^ <Item>X2)));
}
family DollarTest {
entry: s\np$1\*(s\np);
entry: s\(np$1)\*(s\np);
}
family N {
entry: n<2>[X NUM]: X:sem-obj(*);
}
family Pro {
entry: np<2>[X NUM PERS CASE]:
X:sem-obj(*);
}
# Here we create a macro to describe the category for a verb, with transitive
# arguments ARGS (possibly empty) and corresponding semantics SEM.
# Don't call this macro just "verb" since that's used above already!
def verbcat(args, sem) {
# A * here corresponds to <prop name="[*DEFAULT*]"/>.
# Similar use of * appears above in [lex=*] in prepositions,
# and <det>* in determiners.
s<1>[E] \ np<2>[X NUM PERS nom] args:
E:action(* <Actor>X:animate-being sem)
}
family IntransV(V) {
entry: verbcat(,);
}
# This shows how we could extend verbcat() to handle transitive verbs.
# Since the arguments for ditransitive verbs could potentially either
# before or after the transitive argument, we need two different macros.
# Or, we could create one macro, with two different parameters for "before"
# and "after" arguments.
def before_transverbcat(args, sem) {
verbcat(args / np<3>[Y acc], <Patient>Y:sem-obj sem)
}
def after_transverbcat(args, sem) {
verbcat(/ np<3>[Y acc] args, <Patient>Y:sem-obj sem)
}
family TransV(V) {
entry: after_transverbcat(,);
}
family DitransBenV(V) {
# Careful here! Remember that the arguments in a CCG category will be
# listed in backwards order compared to how they appear in the surface
# syntax. Hence SUBJ VERB X Y Z corresponds to s\np/z/y/x.
entry DTV:
after_transverbcat(/< np[Z acc], <Beneficiary>Z:animate-being);
entry NP-PPfor:
before_transverbcat(/ pp[Z lex=for], <Beneficiary>Z:animate-being);
# This shows how we could define the previous entry directly in
# terms of verbcat().
#entry NP-PPfor: verbcat(/ pp<4>[Z lex=for] /< np<3>[Y acc],
# <Patient>Y:sem-obj <Beneficiary>Z:animate-being);
}
#############################################################
# #
# TESTBED #
# #
#############################################################
# Statements to put in testbed.xml. If you omit the number, it will omit
# the corresponding numOfParses item in the XML; I think this currently
# is equivalent to specifying 1 as the number. If you put a ! before a
# line, this indicates a "known failure" (known="true" in the XML).
testbed {
the teacher buys the policeman a book: 1;
the teacher buys the policemen some flowers: 1;
the teachers buy a book: 3;
she buys it: 3;
she buy it: 0;
# Possible example of a known failure, in case we know we don't correctly
# reject "she buy buy":
! she buy buy: 0;
they buys it: 0;
she buys the flower for him: 1;
she buys the flower for he: 0;
her buys the flower for him: 0;
he rented her a DVD: 1;
he rented a DVD for her: 1;
he rented a DVD her: 0;
}
================================================
FILE: ccg-format-grammars/tinytiny/tinytiny.ccg
================================================
# A truly minimal grammar for CCG.
# Ben Wing, May 2006
################## Features #################
feature {
CASE<2>: nom acc;
NUM<2>: sg pl;
PERS<2>: non-3rd {1st 2nd} 3rd;
TENSE<E>: past pres;
SEM-NUM<X:NUM>: sg-X pl-X;
# Some alternate code that appeared from somewhere, I'm not sure anymore.
# Enable the following three statements and disable the statement above
# beginning `NUM<2>: ...'. It looks like it tries to implement something to
# do with number agreement. (Note that number agreement is already taken care
# of in the normal system. I'm not sure what the code is trying to do.)
# num<2,X:num>: sg pl;
# num<1,2>: sg-agr:sg pl-agr:pl;
# num<2>: sg-2:sg pl-2:pl;
ontology: sem-obj {
phys-obj {
animate-being {
person
}
thing
}
situation {
change {
action
}
state
}
};
}
################## Words #################
# Example of how to have punctuation and other non-word characters in a
# lexical item.
word '.':Punc;
word ',':Punc;
word '?':Punc;
word the:Det;
word a:Det: sg;
word pro1:Pro(animate-being) {
I: 1st sg nom sg-X;
me: 1st sg acc sg-X;
we: 1st pl nom pl-X;
us: 1st pl acc pl-X;
}
def noun(sing, plur, class) {
word sing:N(class) {
*: sg sg-X;
plur: pl pl-X;
}
}
noun(book, books, thing)
noun(peach, peaches, thing)
noun(boy, boys, person)
noun(policeman, policemen, person)
def verb(stem, props, 3sing, pasttense) {
word stem:props {
*: pres non-3rd sg;
3sing: pres 3rd sg;
*: pres pl;
pasttense: past;
}
}
verb(go, IntransV, goes, went)
verb(sleep, IntransV, sleeps, slept)
verb(eat, TransV IntransV, eats, ate)
verb(see, TransV, sees, saw)
# This is a test of set arguments. Not clear it's useful or should be here;
# maybe move to tiny.ccg?
verb(setverb, SetargV, setverbs, setverbed)
################## Categories #################
family Punc {
entry: punc;
}
family Det(indexRel=det) {
entry: np<2>[X PERS=3rd]/^ n<2>[X]:
X:sem-obj(<det>*);
}
family N {
entry: n<2>[X]:
X:sem-obj(*);
}
family Pro {
entry: np<2>[X]:
X:sem-obj(*);
}
family IntransV(V) {
entry: s<1>[E] \ np<2>[X nom]:
E:action(* <Actor>X:animate-being);
}
family TransV(V) {
entry: s<1>[E] \ np<2>[X nom] / np<3>[Y acc]:
E:action(* <Actor>X:animate-being <Patient>Y:sem-obj);
}
family SetargV(V) {
entry: s<1>[E] {\np<2>[X nom], / np<3>[Y acc]}:
E:action(* <Actor>X:animate-being <Patient>Y:sem-obj);
}
################## Test sentences #################
testbed {
the policemen eat: 2;
the boys eat: 2;
the boys eat the peaches: 1;
the policeman sleeps: 1;
the policemen sleeps: 0;
the policemen sleep: 1;
the policeman sleeps the peach: 0;
the policeman saw me: 1;
the policeman saw I: 0;
I see the book: 1;
I sees the book: 0;
I see a book: 1;
I see a books: 0;
}
================================================
FILE: ccgbank/bin/american-to-logical-quotes.py
================================================
"""
Copryright (c) 2011 Dennis N. Mehay
Assumes tokenized, PTB3-normalized UTF-8 text, one sentence per line.
<stdin> => <stdout>
Turns 'American'-style quotations into 'British'/'logical'-style quotations.
So, e.g.,
`` Hello , '' said John .
becomes:
`` Hello '' , said John .
[Insert LGPL here]
"""
import sys, codecs, os
streamReader = codecs.lookup("utf-8")[2]
streamWriter = codecs.lookup("utf-8")[-1]
sys.stdin = streamReader(sys.stdin)
sys.stdout = streamWriter(sys.stdout)
for ln in sys.stdin:
# trim off extra whitespace and replace double spaces with single spaces.
ln = ln.strip().replace(u" ", u" ")
# now replace
# <space>, ''
# with
# <space>''
# and
# <space>. ''
# with
# <space>'' .
ln = ln.replace(u" , ''", " '' ,").replace(u" . ''", " '' .")
# now fix any double-punct messes this might have created.
ln = ln.replace(u" '' . ?", u" . '' ?").replace(u" '' . !", u" . '' !")
sys.stdout.write(ln + '\n')
================================================
FILE: ccgbank/bin/convert-mtc-systems.py
================================================
"""
This program takes a set of documents (all streamed from stdin at once) and formats them in a way suitable for
use with the NIST-distributed mteval script. The output is in UTF-8.
Usage: cat [MTC_DIR_FOR_SYSTEM] | python convert-mtc.systems.py [doctype-string {'source', 'target', 'ref'} (default='target')] | [NEW_XML_DOC_TO_STDOUT]
"""
import sys, os, re, codecs, xml.sax.saxutils, my_unicode
try:
import chardet
except:
chardet = None
from xml.etree.ElementTree import *
from collections import defaultdict
def tokenize(t):
"""very simple text tokenization:
<blah>n't => <blah> n't
<blah>'s => <blah> 's
<blah>' => <blah> '
where '<blah>' is not whitespace.
"""
t = t.replace("n't", " n't").replace("'s", " 's").replace("' ", " ' ")
if t[-1] == "'":
t = t[:-1] + " " + t[-1]
return t.replace(" ", " ")
def decode_line(ln, encoding):
res = None
try:
res = ln.decode(encoding)
except:
try:
res = ln.decode("iso-8859-2")
except:
try:
res = ln.decode("iso-8859-1")
except:
try:
res = ln.decode("utf-8")
except:
try:
res = ln.decode("GB2312")
except:
try:
res = ln.decode("Big5")
except:
try:
res = ln.decode("EUC-TW")
except:
res = ln
return res
doc_pattern = re.compile(u"<[Dd][Oo][Cc] docid=\"(.*)\" sysid=\"(.*)\">(.*)")
doc_pattern_source = re.compile(u"<[Dd][Oo][Cc] docid=\"(.*)\">(.*)")
seg_pattern = re.compile(u"<seg id=\"?(.*)\"?>(.*)</seg>$")
doc_type = "target"
if len(sys.argv) >= 2:
doc_type = sys.argv[1].lower()
if not doc_type in ["target", "source", "reference"]:
doc_type = "target"
mtc_in = sys.stdin.readlines()
mtc_all = (os.linesep).join(mtc_in)
if not chardet is None:
encoding = chardet.detect(mtc_all)['encoding']
else:
encoding = "ISO-8859-2"
# turn stdout into a UTF-8 converting writer.
streamWriter = codecs.lookup("UTF-8")[-1]
sys.stdout = streamWriter(sys.stdout)
output = sys.stdout
# map from auto-assigned ID to MTC ID.
autoid2mtcid = {}
mtc = defaultdict(lambda: [])
sys = None
for l in mtc_in:
l = decode_line(l, encoding).strip()
if l.startswith("<DOC"):
match = doc_pattern.findall(l)[0] if (doc_type == "target") else doc_pattern_source.findall(l)[0]
if doc_type == "target":
(docid, sysid) = (match[0], match[1])
else:
(docid, sysid) = (match[0], None)
docid = docid.replace("_",".")
curr_doc = docid
curr_sys = sysid
sys = curr_sys
elif l.startswith("<seg"):
match = seg_pattern.findall(l)[0]
(segid, text) = (match[0], match[1])
mtc[(curr_sys, curr_doc)].append((segid, text.strip()))
output.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + \
os.linesep + "<!DOCTYPE mteval SYSTEM \"ftp://jaguar.ncsl.nist.gov/mt/resources/mteval-xml-v1.3.dtd\">" + os.linesep +\
"<mteval>" + os.linesep)
if doc_type == "target":
output.write(" <tstset setid=\"multiple_translation_set\" srclang=\"Chinese\" trglang=\"English\" sysid=\"%s\">" % sys + os.linesep)
elif doc_type == "source":
output.write(" <srcset setid=\"multiple_translation_set\" srclang=\"Chinese\">" + os.linesep)
else:
output.write(" <refset setid=\"multiple_translation_set\" srclang=\"Chinese\" trglang=\"English\" refid=\"1\">" + os.linesep)
docs = mtc.keys()
docs.sort()
for (sy,doc) in docs:
output.write(" <doc docid=\"%s\" genre=\"nw\">" % doc + os.linesep)
segids_and_texts = mtc[(sy,doc)]
segids_and_texts.sort(lambda a,b: cmp(int(a[0]),int(b[0])))
for (segid,text) in segids_and_texts:
output.write(" <seg id=\"%s\"> %s </seg>" % (segid,xml.sax.saxutils.escape(my_unicode.removeInvalidChars(tokenize(text)))) + os.linesep)
output.write(" </doc>" + os.linesep)
if doc_type == "target":
output.write(" </tstset>" + os.linesep + "</mteval>")
elif doc_type == "source":
output.write(" </srcset>" + os.linesep + "</mteval>")
else:
output.write(" </refset>" + os.linesep + "</mteval>")
================================================
FILE: ccgbank/bin/convert-spaces-to-newlines.py
================================================
#
# converts spaces to newlines, and newlines to special <eol> chars,
# from stdin to stdout
#
import sys, re;
[sys.stdout.write(re.sub(' ','\n',re.sub('\n','<eol>',line))) for line in sys.stdin]
================================================
FILE: ccgbank/bin/convert_all
================================================
#!/bin/bash
for i in 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
do
echo "Starting conversion for section $i"
nice ccg-build -Dsect=$i -Dfile=* convert-puncts-aux-bbn >& logs/log.convert.$i &
done
================================================
FILE: ccgbank/bin/correlate-to-judgments.py
================================================
"""
Correlate human judgments (streamed into sys.stdin -- e.g., from the MTC)
to the BLEU/NIST scores in the given directories (sys.argv[1] and sys.argv[2]).
Depends on rpy2 having been installed.
"""
import sys, os
try:
import rpy2
import rpy2.robjects as robjs
except ImportError:
print >> sys.stderr, "please install rpy2. exiting..."
sys.exit(-1)
def num2string(n):
"""
E.g., 0 => '00', 1 => '01' and 10 => '10'.
"""
try:
numm = int(n)
except:
numm = n
if numm < 10:
return str(numm)
else:
return str(numm)
human_judgments = []
human_judgments_fluency = [] # fluency.
human_judgments_acc = [] # accuracy.
human_judgments_ave = [] # average of both.
for l in sys.stdin:
l = l.strip()
if l.startswith("#"):
continue
lparts = l.split(",")
# appending (sys,doc,judge,ref_sys,segment,fluency,accuracy)
human_judgments.append(tuple(lparts[0:7]))
human_judgments_fluency.append(int(lparts[5]))
human_judgments_acc.append(int(lparts[6]))
human_judgments_ave.append((float(lparts[5]) + float(lparts[6]))/2.0)
human_judgments_fluency = robjs.FloatVector([float(i) for i in human_judgments_fluency])
human_judgments_acc = robjs.FloatVector([float(i) for i in human_judgments_acc])
human_judgments_ave = robjs.FloatVector([float(i) for i in human_judgments_ave])
bleu_nist_dir1 = sys.argv[1]
bleu_nist_dir2 = sys.argv[2]
# the next two are maps from: (sys,doc,ref,segID) => score
bleu_scores = {}
nist_scores = {}
for f in [fl for fl in os.listdir(bleu_nist_dir1) if ("BLEU" in fl or "NIST" in fl)]:
for l in open(bleu_nist_dir1 + os.sep + f, "rb").readlines():
if l.strip() == "":
continue
(sys,ref_sys,doc,seg,bleu_or_nist_score) = l.split(",")
if "BLEU" in f:
bleu_scores[(sys,doc,ref_sys,"S"+num2string(seg))] = bleu_or_nist_score
else:
nist_scores[(sys,doc,ref_sys,"S"+num2string(seg))] = bleu_or_nist_score
for f in [fl for fl in os.listdir(bleu_nist_dir2) if ("BLEU" in fl or "NIST" in fl)]:
for l in open(bleu_nist_dir2 + os.sep + f, "rb").readlines():
if l.strip() == "":
continue
(sys,ref_sys,doc,seg,bleu_or_nist_score) = l.split(",")
if "BLEU" in f:
bleu_scores[(sys,doc,ref_sys,"S"+num2string(seg))] = float(bleu_or_nist_score)
else:
nist_scores[(sys,doc,ref_sys,"S"+num2string(seg))] = float(bleu_or_nist_score)
# for both BLEU and NIST, compute rpy2 vectors that parallel the seqeuence
# of human judgments.
# step through the (sys,doc,judge,ref_sys,segment,fluency,accuracy) tuples.
bleu_lst = []
nist_lst = []
for (s,d,j,rs,sg,f,a) in human_judgments:
if (s,d,rs,sg) in bleu_scores:
bleu_lst.append(bleu_scores.get((s,d,rs,sg)))
else:
print "nope", (s,d,rs,sg)
if (s,d,rs,sg) in nist_scores:
nist_lst.append(nist_scores.get((s,d,rs,sg)))
else:
print "nope", (s,d,rs,sg)
bleu_vec = robjs.FloatVector(bleu_lst)
nist_vec = robjs.FloatVector(nist_lst)
# compute correlations
b_fluency = robjs.r['cor'](bleu_vec, human_judgments_fluency)
b_accuracy = robjs.r['cor'](bleu_vec, human_judgments_acc)
b_average = robjs.r['cor'](bleu_vec, human_judgments_ave)
print "BLEU's Pearson correlation wrt fluency:", b_fluency
print "BLEU's Pearson correlation wrt accuracy:", b_accuracy
print "BLEU's Pearson correlation wrt the average of fluency and accuracy", b_average
nist_fluency = robjs.r['cor'](nist_vec, human_judgments_fluency)
nist_accuracy = robjs.r['cor'](nist_vec, human_judgments_acc)
nist_average = robjs.r['cor'](nist_vec, human_judgments_ave)
print "NIST's Pearson correlation wrt fluency:", nist_fluency
print "NIST's Pearson correlation wrt accuracy:", nist_accuracy
print "NIST's Pearson correlation wrt the average of fluency and accuracy", nist_average
================================================
FILE: ccgbank/bin/filter_feats.py
================================================
#!/usr/bin/env python
"""
(c) 2008 Dennis N. Mehay
Use this file any way you want, just please give the
author credit if it makes it into any research in any
meaningful way. I make no claims whatsoever about the
fitness or merchantability of this code. Use at
your own risk.
"""
import sys, math
from optparse import OptionParser as OP
pr = OP()
pr.add_option("-i","--input",type="string",help="input source [default=<stdin>].",\
default=sys.stdin)
pr.add_option("-o","--output",type="string",help="output location [default=<stdout>].",\
default=sys.stdout)
pr.add_option("-n","--number",type="int",\
help="number of times a category must have been seen to retain [default=5]",\
default=5)
pr.add_option("-f","--feat_freq",type="int",help="feature frequency cutoff\n"+\
"(how frequent must a feature be to retain it? [default=1])",default=1)
(opts,args) = pr.parse_args(sys.argv)
inf = opts.input
if not inf is sys.stdin:
inf = open(inf,'r')
outf = opts.output
if not outf is sys.stdout:
outf = open(outf,'w')
try:
tag_cnt = {}
ft_cnt = {}
lines = {}
ln_cnt = -1
for l in inf:
l = l.strip()
ln_cnt += 1
lines[ln_cnt] = l
l = l.split()
tag = l[0]
tag_cnt[tag] = tag_cnt.get(tag,0) + 1
feats = l[1:]
if ':' in feats[0] and not(feats[-1]==':'):
# real-valued features
feats = map(lambda a: (a[0:a.rfind(':')],a[a.rfind(':')+1:]), l[1:])
for (f,act) in feats:
ft_cnt[f] = ft_cnt.get(f,0) + 1 #math.fabs(float(act))
else:
# boolean
for f in feats:
ft_cnt[f] = ft_cnt.get(f,0) + 1
for i in range(ln_cnt + 1):
l = lines.get(i)
l = l.strip().split()
tag = l[0]
feats = l[1:]
if tag_cnt.get(tag) >= opts.number:
tag_printed = False
if ':' in feats[0] and not(feats[-1]==':'):
# real-valued features
feats = map(lambda a: (a[0:a.rfind(':')],a[a.rfind(':')+1:]), l[1:])
for (f,act) in feats:
if ft_cnt.get(f) >= opts.feat_freq:
if not tag_printed:
print >> outf, tag,
tag_printed = True
print >> outf, f+':'+act,
else:
# boolean
for f in feats:
if not tag_printed:
print >> outf, tag,
tag_printed = True
print >> outf, f,
print >> outf, ''
if i%100==0:
outf.flush()
finally:
outf.flush()
if not inf is sys.stdin:
inf.close()
if not outf is sys.stdout:
outf.close()
================================================
FILE: ccgbank/bin/find-betas-no-gold.py
================================================
"""
Given:
(1) a file supertagged words (OpenCCC file output format
as produced by, e.g., WordAndPOSDictionaryLabellingStrategy),
(2) a list (as a string) of tagging ambiguity levels (e.g.,
"1.4 1.6 1.8...") that represent the desired tag/word levels
(rounded off at the hundredths place to <=1.41, <=1.61, etc.),
(3) possibly tagging dictionaries (if needed),
and
(4) the corresponding 'K' parameters (e.g., "20 150" as in Clark and
Curran (2007)),
produce the list of betas that would produce those ambiguity
levels.
"""
import optparse
import sys
import decimal
import math
"""
A little on-the-fly class creation for iterating through multi-stag format files.
"""
class STIterator:
def __init__(self, f):
self.f = f
def next(self):
l = self.f.readline()
while l.strip()=='' and l:
l = self.f.readline()
if l:
lines = []
# not at eof.
if l.strip() != "<s>":
print >> sys.stderr, "line=", l.strip(), "ill-formed st output file."
raise Exception
else:
l = self.f.readline()
while l.strip() != "</s>" and l:
lines.append(l.strip())
l = self.f.readline()
if lines == []:
print >> sys.stderr, "line=", l.strip(), "ill-formed st output file."
raise Exception
else:
res = []
for ln in lines:
# each line is: word <howmanypos> POS1 ... POSN <howmanysupertags> ST1 ... STM
# we just want the word, first pos and supertags.
parts = ln.split()
wd = parts[0]
pos = parts[2]
how_many_poss = int(parts[1])
stgs = zip(parts[2+(how_many_poss*2)+1::2],map(lambda n: float(n), parts[2+(how_many_poss*2)+2::2]))
res.append((wd,pos,stgs))
return res
else:
raise StopIteration
def __iter__(self): return self
p = optparse.OptionParser()
p.add_option("-i", "--inputf", type="string", help="input source [default=<stdin>]", default=sys.stdin)
p.add_option("-o", "--outputf", type="string", help="output destination [default=<stdout>]", default=sys.stdout)
p.add_option("-a", "--ambiguities", type="string", help="a space delimited string of tagging ambiguity levels [default=\"1.2 1.4 1.6 1.8 2.0 2.5 3.0 3.5\"]",
default="1.2 1.4 1.6 1.8 2.0 2.5 3.0 3.5")
p.add_option("-K", "--Ks", type="string", help="a space delimited string of K values (only two) [default=\"20 150\", optional]", \
default="20 150")
p.add_option("-w", "--wordkeyeddict", type="string", help="word-keyed tagging dict [no default, optional]",\
default=None)
p.add_option("-p", "--poskeyeddict", type="string", help="POS-keyed tagging dict [no default, optional]",\
default=None)
(ops,args) = p.parse_args()
try:
# POS-keyed dict must be there if word-keyed one is.
assert (not (not (ops.wordkeyeddict is None) and (ops.poskeyeddict is None)))
except:
print >> sys.stderr, "need POS-keyed dict if using word-keyed dict."
sys.exit(-1)
inf = ops.inputf
if not inf is sys.stdin:
inf = open(inf, 'r')
outf = ops.outputf
if not outf is sys.stdout:
outf = open(outf, 'w')
input_sents = [s for s in STIterator(inf)]
wdict = {}
if not ops.wordkeyeddict is None:
entries = map(lambda l: l.split(), open(ops.wordkeyeddict, 'r').readlines())
wdict[entries[0]] = (int(entries[1]), set(entries[2:]))
pdict = {}
if not ops.poskeyeddict is None:
entries = map(lambda l: l.split(), open(ops.poskeyeddict, 'r').readlines())
posdict[entries[0]] = set(entries[1:])
try:
ambs = map(lambda a: float(a), ops.ambiguities.split())
betas = []
current_beta = 1.0
last_beta_above = None
last_beta_below = 0.0
total_tags = 0.0
total_words = 0.0
total_right = 0.0
ks = map(lambda kay: int(kay), ops.Ks.split())
for a in ambs:
current_beta = 1.0
last_beta_above = None
last_beta_below = 0.0
k = ks[0] if a!=ambs[-1] else ks[1]
found = False
while not found:
total_tags = 0.0
total_words = 0.0
total_right = 0.0
for insent in input_sents:
for lex in insent:
total_words += 1
w = lex[0]
pos = lex[1]
stags = lex[2]
if len(wdict)>0:
# filter with appropriate dictionary.
(freq,tags) = wdict.get(w,(0,set([])))
if freq >= k:
tags = tags
else:
tags = pdict.get(pos,set([]))
if len(tags)>0:
stags = filter(lambda st: st[0] in tags, stags)
best = stags[0][1]
# how many tags are there that made the beta cut-off?
total_tags += len(filter(lambda st: st[1] >= (current_beta * best), stags))
# round to the nearest hundredth
tags_per_word = (float(total_tags)/total_words)
decimal.getcontext().prec = 4
as_string = str(decimal.Decimal(str(tags_per_word)))
# we're looking for 1.40..., or 1.60..., etc. (as the case may be)
found = tags_per_word == a or ((as_string[:3]==str(a)[:3]) and (as_string[3]=='0'))
if found:
betas.append(current_beta)
current_beta = 1.0
last_beta = None
else:
# decide which direction to loosen the beta.
if tags_per_word > a:
# get more restrictive (i.e., larger beta).
if last_beta_above is None:
print >> sys.stderr, "error"
sys.exit(-1)
else:
tempbeta = current_beta
current_beta += math.fabs(last_beta_above - current_beta)/2.0
last_beta_below = tempbeta
else:
# get less restrictive (i.e., smaller beta)
tempbeta = current_beta
current_beta -= math.fabs(current_beta - last_beta_below)/2.0
last_beta_above = tempbeta
print >> outf, "betas", ' '.join(map(lambda b: str(b), betas))
except:
print "Unexpected error:", sys.exc_info()[0]
raise
finally:
# clean up, clean up...
if not inf is sys.stdin:
inf.close()
if not outf is sys.stdout:
outf.close()
================================================
FILE: ccgbank/bin/gen_parser_events_a
================================================
#!/bin/bash
for i in 02 03 04 05
do
ccg-build -Dsect=$i -f build-ps.xml gen-parser-events &> logs/log.gen.parser.events.$i
done
================================================
FILE: ccgbank/bin/gen_parser_events_b
================================================
#!/bin/bash
for i in 06 07 08 09
do
ccg-build -Dsect=$i -f build-ps.xml gen-parser-events &> logs/log.gen.parser.events.$i
done
================================================
FILE: ccgbank/bin/gen_parser_events_c
================================================
#!/bin/bash
for i in 10 11 12 13
do
ccg-build -Dsect=$i -f build-ps.xml gen-parser-events &> logs/log.gen.parser.events.$i
done
================================================
FILE: ccgbank/bin/gen_parser_events_d
================================================
#!/bin/bash
for i in 14 15 16 17
do
ccg-build -Dsect=$i -f build-ps.xml gen-parser-events &> logs/log.gen.parser.events.$i
done
================================================
FILE: ccgbank/bin/gen_parser_events_e
================================================
#!/bin/bash
for i in 18 19 20 21
do
ccg-build -Dsect=$i -f build-ps.xml gen-parser-events &> logs/log.gen.parser.events.$i
done
================================================
FILE: ccgbank/bin/gen_realizer_events_a
================================================
#!/bin/bash
for i in 02 03 04 05
do
ccg-build -Dsect=$i -f build-rz.xml gen-realizer-events &> logs/log.gen.realizer.events.$i
done
================================================
FILE: ccgbank/bin/gen_realizer_events_b
================================================
#!/bin/bash
for i in 06 07 08 09
do
ccg-build -Dsect=$i -f build-rz.xml gen-realizer-events &> logs/log.gen.realizer.events.$i
done
================================================
FILE: ccgbank/bin/gen_realizer_events_c
================================================
#!/bin/bash
for i in 10 11 12 13
do
ccg-build -Dsect=$i -f build-rz.xml gen-realizer-events &> logs/log.gen.realizer.events.$i
done
================================================
FILE: ccgbank/bin/gen_realizer_events_d
================================================
#!/bin/bash
for i in 14 15 16 17
do
ccg-build -Dsect=$i -f build-rz.xml gen-realizer-events &> logs/log.gen.realizer.events.$i
done
================================================
FILE: ccgbank/bin/gen_realizer_events_e
================================================
#!/bin/bash
for i in 18 19 20 21
do
ccg-build -Dsect=$i -f build-rz.xml gen-realizer-events &> logs/log.gen.realizer.events.$i
done
================================================
FILE: ccgbank/bin/get-text-from-mtc-style.py
================================================
"""
Gets the text from a MTC-style corpus.
Just looks for '<seg id=...> </seg>' segments.
<stdin> => <stdout>
(c) 2011 Dennis Nolan Mehay
[Insert LGPL here]
"""
import re, sys, codecs, os
pattern = re.compile(u"<seg id=[^ ]*>\\s*(.*)\\s*</seg>")
input = sys.stdin.read()
try:
import chardet
encoding = chardet.detect(input)['encoding']
except:
# this is what the original MTC corpus is encoded in.
encoding = "iso-8859-2"
input = input.decode(encoding)
streamWriter = codecs.lookup(encoding)[-1]
sys.stdout = streamWriter(sys.stdout)
for seg in pattern.findall(input):
sys.stdout.write(seg.strip() + os.linesep)
================================================
FILE: ccgbank/bin/get-truecase-list.py
================================================
"""
Requires Python >= 2.6x+ but < 3.0.
Takes in a stream (from stdin) or file of *tokenized* plain text (utf-8),
returns a list of words that occurred more than twice and were in upper-case
more frequently than not.
"""
import optparse, sys, codecs, os
from collections import defaultdict
def isAllUpper(st):
return ( st.upper() == st and st.lower() != st )
op = optparse.OptionParser()
op.add_option("-i", "--input", type="string", help="input file or stream (default = <stdin>)",
default=sys.stdin)
op.add_option("-o", "--output", type="string", help="output file or stream (default = <stdout>)",
default=sys.stdout)
op.add_option("-f", "--use_first", action="store_true", help="whether to use the first word of each "+\
"sentence for counting uppercase vs. lowercase (default = False)", default=False)
(ops, args) = op.parse_args()
inf = ops.input
if not inf is sys.stdin:
inf = open(inf, "rb")
use_first_word = ops.use_first
outf = ops.output
if not outf is sys.stdout:
outf = codecs.open(outf, "wb", "utf-8")
else:
# make stdout code utf-8
streamWriter = codecs.lookup("UTF-8")[-1]
outf = streamWriter(outf)
# map from: lowercased_word_key => specific_cased_form => count
wds2ulcounts = defaultdict(lambda: defaultdict(lambda: 0))
try:
l = inf.readline()
while l:
l = l.strip().decode("utf-8")
if l == u"":
continue
words = l.split()
if not use_first_word:
words = words[1:]
for w in words:
key = w.lower()
wds2ulcounts[key][w] += 1
l = inf.readline()
for (wdkey,frms) in wds2ulcounts.items():
wdforms = frms.items()
# sum all counts. if more than 2, then write out the most frequent, else don't.
sum_all = sum([cnt for (wf,cnt) in wdforms])
# if there is only one form, seen more than once and it is a cased form, print it (this last will avoid printing punctuation and
# always-lowercase words like 'the').
if len(wdforms) == 1:
most_freq = wdforms[0]
if sum_all > 2 and most_freq[0].lower() != most_freq[0]:
#outf.write("wd %s only has one form, seen %d times" % (wdforms[0][0], wdforms[0][1]) + os.linesep)
outf.write(most_freq[0] + os.linesep)
else:
continue
else:
if sum_all == 2:
#outf.write("wd %s only occurred twice. cannot decide which is most frequent." % (wdkey) + os.linesep)
continue
else:
wdforms.sort(lambda a,b: -cmp(a[1],b[1]))
most_freq = wdforms[0]
second_most_freq = wdforms[1]
# see whether there is a tie. if so, no dice.
if most_freq[1] == second_most_freq[1]:
#outf.write("wd %s occurred more than twice, but there was a tie btw forms %s and %s (perhaps others)." % \
# (wdkey, wdforms[0][0], wdforms[1][0]) + os.linesep)
continue
else:
# only mention it if the most freq form is uppercased somewhere.
if most_freq[0][0].lower() != most_freq[0][0]:
#outf.write("wd %s occurred most with form %s." % (wdkey, wdforms[0][0]) + os.linesep)
outf.write(most_freq[0] + os.linesep)
finally:
try:
outf.close()
except:
pass
try:
inf.close()
except:
pass
================================================
FILE: ccgbank/bin/get-uniq-nbest.py
================================================
from BeautifulSoup import BeautifulStoneSoup as BSS
import codecs
import sys, os
streamWriter = codecs.lookup('utf-8')[-1]
sys.stdout = streamWriter(sys.stdout)
inf = open(sys.argv[1], "rb").read()
try:
beta = float(sys.argv[2])
except:
beta = 0.1
soup = BSS(inf)
segs = soup.findAll(lambda t: t.name == u'seg')
tot_paraphrases = 0.0
tot_segs = 0.0
for seg in segs:
tot_segs += 1
if seg.get('complete') == 'true':
best = seg.find(lambda p: p.name == 'best')
ref = seg.ref.find(text=True)
eye_dee = seg.get(u'id')
paraphrases = set([p.find(text=True) for p in seg.findAll(lambda e: e.name in [u'best', u'next'])])
tot_paraphrases += len(paraphrases)
sys.stdout.write(ref + u' ||| ' + u' <-> '.join(paraphrases))
sys.stdout.write(os.linesep)
print "ave paraphrases/seg", tot_paraphrases/tot_segs
================================================
FILE: ccgbank/bin/get_factors_from_parse.py
================================================
#!/usr/bin/env python
"""
(c) 2008 Dennis N. Mehay
Use this file any way you want, just please give the
author credit if it makes it into any research in any
meaningful way. I make no claims whatsoever about the
fitness or merchantability of this code. Use at
your own risk.
Take a file of CCGbank-style parses and get the words,
POSs and lexical cat's from them.
We also insert the word as the 'lemma', just as a placeholder.
So we have the following output form (for each parse
in the input file):
<word1>|<word1AsLemma>|<POS1>|<ccg_lexcat1> ... <wordN>|<wordNAsLemma>|<POSN>|<ccg_lexcatN>
Print out parse IDs (if there) as they are.
"""
import sys, re
import optparse
p = optparse.OptionParser()
p.add_option("-i", "--inputf", type="string", \
help="Input file to be postprocessed (one parse per line with IDs preceding them) [defaults to stdin]",\
default=None)
p.add_option("-o", "--outputf", type="string", \
help="The output location [defaults to stdout]",\
default=None)
(opts, args) = p.parse_args(sys.argv)
inf = None
outf = None
if opts.inputf is None:
inf = sys.stdin
else:
inf = open(opts.inputf,'r')
if opts.outputf is None:
outf = sys.stdout
else:
outf = open(opts.outputf,'w')
try:
global lexNodePattern
lexNodePattern = re.compile(r'(<L\s.*?>)+?')
"""
A procedure that returns a list of all lexical nodes in a CCGbank-style
parse tree (in string representation).
"""
def getLexicalNodes(tree):
matches = re.findall(lexNodePattern, tree)
return matches
for l in inf:
if "ID=" in l:
print >> outf, l.strip()
elif l.strip()!='':
nodes = getLexicalNodes(l.strip())
ans = ''
for n in nodes:
parts = n.split()
(w,pos,st) = (parts[4],parts[2],parts[1])
ans += w+'|'+w+'|'+pos+'|'+st+ ' '
print >> outf, ans.strip()
finally:
if not opts.inputf is None:
inf.close()
if not opts.outputf is None:
outf.close()
================================================
FILE: ccgbank/bin/get_just_words_from_ner_text.py
================================================
"""
Takes NE tagged text from stdin (assuming utf-8) and does just what it says: prints to stdout only the words.
"""
import codecs, sys, os
from optparse import OptionParser as OP
pr = OP()
pr.add_option("-o","--output",type="string",help="output location [default=<stdout>].",\
default=sys.stdout)
(opts,args) = pr.parse_args(sys.argv)
outf = opts.output
if not outf is sys.stdout:
outf = open(outf,'w')
streamWriter = codecs.lookup("utf-8")[-1]
outw = streamWriter(outf)
for l in sys.stdin:
l = l.decode("utf-8")
parts = l.split()
snt = []
for p in parts:
if u"_" in p:
subparts = p.split(u"_")
if len(subparts) > 1:
w = u"_".join(subparts[:-1])
else:
w = p
snt.append(w)
else:
snt.append(p)
outw.write(u" ".join(snt) + '\n')
outf.flush()
if not outf is sys.stdout:
outf.close()
================================================
FILE: ccgbank/bin/lowercase_tagged_text.py
================================================
#!/usr/bin/env python
"""
Takes a POS-tagged file and writes out the text with tokens lowercased except for
proper nouns. A file with the list of word-tag pairs can also be written out.
(c) 2010 Michael White
[insert LGPL here]
"""
import sys
from optparse import OptionParser as OP
pr = OP()
pr.add_option("-i","--input",type="string",help="input source [default=<stdin>]",\
default=sys.stdin)
pr.add_option("-o","--output",type="string",help="output location [default=<stdout>]",\
default=sys.stdout)
pr.add_option("-p","--pairs",type="string",help="output file for word-tag pairs",\
default=None)
(opts,args) = pr.parse_args(sys.argv)
inf = opts.input
if not inf is sys.stdin:
inf = open(inf,'r')
outf = opts.output
if not outf is sys.stdout:
outf = open(outf,'w')
pairsf = opts.pairs
if not pairsf is None:
pairsf = open(pairsf,'w')
try:
sent = []
pairs = []
for l in inf:
l = l.strip()
l = l.split()
if l[0] == '<s>':
sent = []
pairs = []
elif l[0] == '</s>':
print >> outf, ' '.join(sent)
if not pairsf is None:
for (token,tag) in pairs:
print >> pairsf, token, tag
else:
token,tag = l[0],l[1]
if tag[:3] != 'NNP' and (len(token) <= 1 or not token[1].isupper()):
token = token.lower()
sent.append(token)
pairs.append((token,tag))
if len(l) >= 4:
tag2 = l[3]
pairs.append((token,tag2))
finally:
if not inf is sys.stdin:
inf.close()
if not outf is sys.stdout:
outf.close()
if not pairsf is None:
pairsf.close()
================================================
FILE: ccgbank/bin/merge-mtc-ids.py
================================================
"""
This program re-inserts the MTC unique IDs (sys+DOC+segment) into an auto-number-ID'ed parse of said
MTC (or similar) document produced by OpenCCG's 'ccg-parse'.
Usage: python merge-mtc-ids.py [output-of-OpenCCG-parser] [MTC-like-input-file] > [output-of-OpenCCG-parser-with-original-MTC-ids]
"""
import sys, os, re, codecs
try:
import chardet
except:
chardet = None
from xml.etree.ElementTree import *
doc_pattern = re.compile(u"<[Dd][Oo][Cc] docid=\"(.*)\" sysid=\"(.*)\">(.*)")
seg_pattern = re.compile(u"<seg id=\"?(.*)\"?>(.*)</seg>$")
openccg_in = sys.argv[1]
#mtc_in = codecs.open(sys.argv[2], "rb", "utf-8").read()
if not chardet is None:
encoding = chardet.detect(open(sys.argv[2], "rb").read())['encoding']
else:
encoding = "ISO-8859-2"
mtc_in = codecs.open(sys.argv[2], "rb", encoding).readlines()
# turn stdout into a UTF-8 converting writer.
streamWriter = codecs.lookup(encoding)[-1]
sys.stdout = streamWriter(sys.stdout)
output = sys.stdout
# map from auto-assigned ID to MTC ID.
autoid2mtcid = {}
mtc_ids = []
for l in mtc_in:
l = l.strip()
if l.startswith("<DOC"):
match = doc_pattern.findall(l)[0]
(docid, sysid) = (match[0], match[1])
curr_doc = docid
curr_sys = sysid
elif l.startswith("<seg"):
match = seg_pattern.findall(l)[0]
(segid, text) = (match[0], match[1])
mtc_ids.append((curr_sys, curr_doc, segid, text.strip()))
output.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + os.linesep + "<regression>" + os.linesep)
for event, elem in iterparse(openccg_in):
if elem.tag.lower() == "item":
next_mtc_id = mtc_ids.pop(0)
elem.set("info", u",".join(next_mtc_id[:-1]))
output.write(u"\t" + tostring(elem).strip() + os.linesep)
output.write("</regression>" + os.linesep)
================================================
FILE: ccgbank/bin/merge-stanford-morpha-with-pos.py
================================================
"""
Given two files: (1) the output of Stanford's re-implementation of 'morpha' and (2) the 'pairs' file of <word><space><POS>(<space><SEMCLASS>),
merge them into a morph.xml file.
"""
import sys, codecs, os
from optparse import OptionParser as OP
from xml.sax import saxutils
pr = OP()
pr.add_option("-m","--morpha_input",type="string",help="morpha input file (required arg)",\
default=None)
pr.add_option("-p","--pairs_input",type="string",help="pairs input file (required arg)",\
default=None)
pr.add_option("-o","--output",type="string",help="output location [default=<stdout>]",\
default=sys.stdout)
(opts,args) = pr.parse_args(sys.argv)
# we do not check that you passed in the files (this is intended for internal use only, not as a
# user-friendly app).
pinf = codecs.open(opts.pairs_input,'rb','utf-8')
minf = codecs.open(opts.morpha_input,'rb','utf-8')
outf = opts.output
if not outf is sys.stdout:
outf = codecs.open(outf,'wb','utf-8')
else:
streamWriter = codecs.lookup("utf-8")[-1]
outf = streamWriter(sys.stdout)
pl = pinf.readline()
ml = minf.readline()
outf.write('<?xml version="1.0" encoding="UTF-8"?>' + '\n')
outf.write('<morph name="novel">' + '\n')
entries = []
try:
while pl and ml:
pl = pl.strip()
ml = ml.strip()
pl = pl.split()
ml = ml.split()
# skip blank lines from line ending differences
if len(pl) < 2:
pl = pinf.readline()
ml = minf.readline()
continue
s = ['<entry word="']
s.append(saxutils.escape(pl[0]))
s.append('" pos="')
s.append(pl[1])
s.append('"')
if len(pl) > 2:
s.append(' class="')
s.append(saxutils.escape(pl[2]))
s.append('"')
if ml[0].lower() != pl[0].lower() and not ("^" in ml[0] or "*****" in ml[0]): # add stem only if distinct.
s.append(' stem="')
s.append(saxutils.escape(ml[0]))
s.append('"')
s.append('/>')
entries.append(s[:])
pl = pinf.readline()
ml = minf.readline()
# sort/uniq
entries.sort()
last_one = None
for e in entries:
if last_one is None or e != last_one:
outf.write(u''.join(e) + '\n')
last_one = e
outf.write('</morph>' + '\n')
finally:
pinf.close()
minf.close()
if not outf is sys.stdout:
outf.close()
================================================
FILE: ccgbank/bin/merge_pos_ne.py
================================================
#!/usr/bin/env python
"""
Takes a POS-tagged file and a file of the original, NE-tagged text and writes out a file of:
...
[word]<whitespace(s)>[POS]<whitespace(s)>[SEM_CLASS](if any)<whitespace(s)>[stem](if any)
...
(c) 2011 Dennis N. Mehay
[insert LGPL here]
"""
class POSOutputIter:
def __init__(self, filelikeobj):
self.f = filelikeobj
def __iter__(self): return self
def next(self):
try:
nxtLn = self.f.readline().strip()
nxtSent = []
if nxtLn != "<s>":
raise StopIteration
nxtLn = self.f.readline().strip()
while nxtLn != "</s>":
nxtSent.append(nxtLn)
nxtLn = self.f.readline().strip()
return nxtSent
except:
raise StopIteration
import sys, codecs, os
from optparse import OptionParser as OP
pr = OP()
pr.add_option("-p","--pos_in",type="string",help="POS-tagged input",\
default=None)
pr.add_option("-n","--ner_tagged_in",type="string",help="NE-tagged input (no POS tags yet)",\
default=None)
pr.add_option("-o","--output",type="string",help="output location [default=<stdout>]",\
default=sys.stdout)
(opts,args) = pr.parse_args(sys.argv)
pinf = opts.pos_in
if not pinf is sys.stdin:
pinf = codecs.open(pinf,'r', 'utf-8')
ninf = opts.ner_tagged_in
if not ninf is sys.stdin:
ninf = codecs.open(ninf,'r', 'utf-8')
outf = opts.output
if not outf is sys.stdout:
outf = codecs.open(outf,'wb', 'utf-8')
else:
streamWriter = codecs.lookup("utf-8")[-1]
outf = streamWriter(sys.stdout)
try:
for posSent in POSOutputIter(pinf):
origSent = ninf.readline()
for (posTW,NETagW) in zip(posSent, origSent.split()):
NETagWParts = NETagW.split(u"_")
if len(NETagWParts) > 1:
NETag = u"\t" + NETagWParts[-1]
else:
NETag = ""
posTW = posTW.split()
w = posTW[0]
tgs = posTW[1:][::2][:2]
for t in tgs:
outf.write(w + u"\t" + t + NETag + '\n')
finally:
pinf.close()
ninf.close()
outf.close()
================================================
FILE: ccgbank/bin/my_unicode.py
================================================
"""
Useful functions for dealing with Unicode messiness that arises from dealing with messy
input (e.g., gibberish from the Multiple Translation Chinese corpus).
"""
import re, doctest
eval(r'u"[\u0080-\uffff]+"')
RE_XML_ILLEGAL = u'([\u0000-\u0008\u000b-\u000c\u000e-\u001f\ufffe-\uffff])' + \
u'|' + \
u'([%s-%s][^%s-%s])|([^%s-%s][%s-%s])|([%s-%s]$)|(^[%s-%s])' % \
(unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff),
unichr(0xd800),unichr(0xdbff),unichr(0xdc00),unichr(0xdfff))
invalid_char_re = re.compile("[^\u0009\u000a\u000d\u0020-\uD7FF\uE000-\uFFFD]")
def removeInvalidChars(text):
"""
Text is a unicode string. All characters that are not valid XML characters are removed.
"""
return re.sub(RE_XML_ILLEGAL, "?", text)
if __name__=="__main__":
doctest.testmod()
================================================
FILE: ccgbank/bin/nbest-mtc-to-bleu-nist.py
================================================
"""
This program takes the n-best realizer output as one file (with sys+DOC+segment IDs -- 'info' attributes),
the tb.xml parser output of OpenCCG (for grabbing the strings of those things that did not parse),
and creates an XML form suitable for use as a reference in the NIST-distributed BLEU script.
Usage: python nbest-mtc-to-bleu-nist.py [nbest-from-realizer] [parser-output(tb.xml)] [max-n] | [NIST/BLEU-compatible-multiref-file]
"""
import sys, os, re, codecs, tempfile, xml.sax.saxutils
try:
import chardet
except:
chardet = None
from xml.etree.ElementTree import *
from collections import defaultdict
# hack procedure. remove later.
def remove_ne(txt):
# remove: Time, Location, Organization, Person, Money, Percent, Date
txt = txt.replace(" LOCATION", "").replace(" PERSON", "").replace(" MONEY", "").replace(" PERCENT", "").replace(" DATE", "").replace(" TIME", "").replace(" ORGANIZATION", "")
return txt
doc_pattern = re.compile(u"<[Dd][Oo][Cc] docid=\"(.*)\" sysid=\"(.*)\">(.*)")
seg_pattern = re.compile(u"<seg id=\"?(.*)\"?>(.*)</seg>$")
openccg_all = open(sys.argv[1], "rb").read().replace("_&", "_&").replace(" & ", "& ")
parser_all = open(sys.argv[2], "rb").read().replace("_&", "_&a
gitextract_fqyg89tm/
├── .gitignore
├── AUTHORS
├── CHANGES
├── LICENSE
├── README.md
├── SAMPLE_GRAMMARS
├── TODO
├── bin/
│ ├── ccg-build
│ ├── ccg-build.bat
│ ├── ccg-cvr
│ ├── ccg-cvr.bat
│ ├── ccg-draw-graph
│ ├── ccg-draw-graph.bat
│ ├── ccg-draw-tree
│ ├── ccg-draw-tree.bat
│ ├── ccg-env
│ ├── ccg-env.bat
│ ├── ccg-grammardoc
│ ├── ccg-grammardoc.bat
│ ├── ccg-gt
│ ├── ccg-gt.bat
│ ├── ccg-ht-factors
│ ├── ccg-hypertagger
│ ├── ccg-hypertagger.bat
│ ├── ccg-parse
│ ├── ccg-parse.bat
│ ├── ccg-postagger
│ ├── ccg-postagger.bat
│ ├── ccg-realize
│ ├── ccg-realize.bat
│ ├── ccg-supertagger
│ ├── ccg-supertagger.bat
│ ├── ccg-test
│ ├── ccg-test.bat
│ ├── ccg-update
│ ├── ccg-update.bat
│ ├── ccg2xml
│ ├── ccg2xml.bat
│ ├── ccg_draw_tree.py
│ ├── dlf_parser.py
│ ├── tccg
│ ├── tccg.bat
│ ├── visccg
│ ├── visccg.bat
│ └── wccg
├── build.xml
├── ccg-format-grammars/
│ ├── arabic/
│ │ └── arabic.ccg
│ ├── inherit/
│ │ └── inherit.ccg
│ ├── tiny/
│ │ └── tiny.ccg
│ └── tinytiny/
│ └── tinytiny.ccg
├── ccgbank/
│ ├── bin/
│ │ ├── american-to-logical-quotes.py
│ │ ├── convert-mtc-systems.py
│ │ ├── convert-spaces-to-newlines.py
│ │ ├── convert_all
│ │ ├── correlate-to-judgments.py
│ │ ├── filter_feats.py
│ │ ├── find-betas-no-gold.py
│ │ ├── gen_parser_events_a
│ │ ├── gen_parser_events_b
│ │ ├── gen_parser_events_c
│ │ ├── gen_parser_events_d
│ │ ├── gen_parser_events_e
│ │ ├── gen_realizer_events_a
│ │ ├── gen_realizer_events_b
│ │ ├── gen_realizer_events_c
│ │ ├── gen_realizer_events_d
│ │ ├── gen_realizer_events_e
│ │ ├── get-text-from-mtc-style.py
│ │ ├── get-truecase-list.py
│ │ ├── get-uniq-nbest.py
│ │ ├── get_factors_from_parse.py
│ │ ├── get_just_words_from_ner_text.py
│ │ ├── lowercase_tagged_text.py
│ │ ├── merge-mtc-ids.py
│ │ ├── merge-stanford-morpha-with-pos.py
│ │ ├── merge_pos_ne.py
│ │ ├── my_unicode.py
│ │ ├── nbest-mtc-to-bleu-nist.py
│ │ ├── ner/
│ │ │ ├── NERApp/
│ │ │ │ └── src/
│ │ │ │ └── nerapp/
│ │ │ │ └── NERApp.java
│ │ │ ├── build-ner-api.properties
│ │ │ ├── build-ner-api.xml
│ │ │ ├── ner-tag.sh
│ │ │ ├── ner_word.py
│ │ │ └── post-process-stanford-ner.py
│ │ ├── normalize_text.py
│ │ ├── post-process-metricsmatr.py
│ │ ├── prepare-for-stanford-morpha.py
│ │ ├── reverse-spaces-to-newlines.py
│ │ ├── run-all-bleu.sh
│ │ ├── stem_nns_vbx
│ │ ├── toUTF-8.py
│ │ └── write_morph.py
│ ├── build-ht.properties
│ ├── build-ht.xml
│ ├── build-models.properties
│ ├── build-models.xml
│ ├── build-original.properties
│ ├── build-original.xml
│ ├── build-ps.properties
│ ├── build-ps.xml
│ ├── build-release.xml
│ ├── build-rz.properties
│ ├── build-rz.xml
│ ├── build-st.properties
│ ├── build-st.xml
│ ├── build.properties
│ ├── build.xml
│ ├── data/
│ │ ├── README
│ │ ├── get_wsj_nns_vb
│ │ ├── novel/
│ │ │ └── two-sents
│ │ ├── sample/
│ │ │ └── AUTO/
│ │ │ └── 00/
│ │ │ └── wsj_0001.auto
│ │ ├── stem_wsj_nns_vb
│ │ ├── wsj-nns-vb
│ │ ├── wsj-nns-vb-stems
│ │ └── wsj_0595Corrected.auto
│ ├── extract/
│ │ ├── add-chunks.xsl
│ │ ├── convert-to-graph.xsl
│ │ ├── convert-to-hlds.xsl
│ │ ├── grammar.xml
│ │ └── raise-nodes.xsl
│ ├── models/
│ │ ├── hypertagger/
│ │ │ ├── ht-prior.flm
│ │ │ ├── ht.config
│ │ │ ├── ht2.config
│ │ │ ├── ht2.train.config
│ │ │ ├── pos.config
│ │ │ ├── posprior.flm
│ │ │ └── vocab.flm
│ │ ├── parser/
│ │ │ ├── binary.flm
│ │ │ ├── gen-events.prefs
│ │ │ ├── leaf.flm
│ │ │ ├── model.init
│ │ │ ├── parse.prefs
│ │ │ ├── top.flm
│ │ │ ├── unary.flm
│ │ │ └── vocab.flm
│ │ ├── realizer/
│ │ │ ├── alph.init
│ │ │ ├── gen-events.prefs
│ │ │ ├── model.init
│ │ │ ├── rz-test.prefs
│ │ │ └── stp3.flm
│ │ └── supertagger/
│ │ ├── pos.config
│ │ ├── posprior.flm
│ │ ├── st.config
│ │ ├── st.config.train
│ │ ├── st.noprior.config
│ │ ├── stprior.flm
│ │ └── vocab.flm
│ ├── original/
│ │ └── models/
│ │ ├── postagger/
│ │ │ ├── pos.config
│ │ │ └── posprior.flm
│ │ └── supertagger/
│ │ ├── st.config
│ │ ├── stprior.flm
│ │ └── vocab.flm
│ ├── plugins/
│ │ ├── MyGenSynScorer.java
│ │ ├── MyNgramCombo.java
│ │ ├── MyNgramGenSynProduct.java
│ │ ├── MyNgramPrecisionBaselineGenInterp.java
│ │ ├── MyNgramPrecisionPerceptronInterp.java
│ │ ├── MyParserPerceptronScorer.java
│ │ ├── MyRealizerPerceptronScorer.java
│ │ ├── MySynAgrFeatureExtractor.java
│ │ ├── MySynSemAgrFeatureExtractor.java
│ │ └── MySynSemFeatureExtractor.java
│ ├── stanford-nlp/
│ │ ├── classifiers/
│ │ │ └── stanfordner-README
│ │ └── stanfordnlp-README
│ └── templates/
│ ├── addFilterLexFeats.xsl
│ ├── addStems.xsl
│ ├── adjustAppos.xsl
│ ├── adjustCandCcats1.xsl
│ ├── adjustCats.xsl
│ ├── adjustParenthetical.xsl
│ ├── adjustReportedSpeech.xsl
│ ├── adjustRoles.xsl
│ ├── adv-placement.xsl
│ ├── agr-macroInsert.xsl
│ ├── allotIdLeaf.xsl
│ ├── allotIdTree.xsl
│ ├── allotIndexRel.xsl
│ ├── anim-macroInsert.xsl
│ ├── annotateAppos-Dash.xsl
│ ├── annotateAppos1.xsl
│ ├── annotateAppos2.xsl
│ ├── annotateAppos3.xsl
│ ├── annotateBrackets.xsl
│ ├── annotateColons.xsl
│ ├── annotateDots.xsl
│ ├── annotateExtraposedAppos.xsl
│ ├── annotateNom-AdjConj.xsl
│ ├── annotateParentheticals1.xsl
│ ├── annotateParentheticals2.xsl
│ ├── annotatePlace.xsl
│ ├── annotatePrtConjs.xsl
│ ├── annotateQuotes.xsl
│ ├── annotateReportedSpeech.xsl
│ ├── annotateStrayAppos.xsl
│ ├── annotateVPCommas.xsl
│ ├── balanceAppos.xsl
│ ├── balanceDash-Paren.xsl
│ ├── catCheck.xsl
│ ├── ccgRules.xsl
│ ├── changePunct.xsl
│ ├── closedCatInsert.xsl
│ ├── collapseMWUFull.xsl
│ ├── collapseMWUPart.xsl
│ ├── collapseMWUSharedTask.xsl
│ ├── computeCats.xsl
│ ├── convTags.xsl
│ ├── correctMistakes1.xsl
│ ├── correctPPHeads.xsl
│ ├── exportToAuto.xsl
│ ├── filterLex.xsl
│ ├── find-s-back-n.xsl
│ ├── genchal11-out.xsl
│ ├── inferConjRules.xsl
│ ├── insertLF.xsl
│ ├── insertOrigPunctsLF.xsl
│ ├── insertPTBInfo.xsl
│ ├── insertPunctLF-PosMod.xsl
│ ├── insertPunctLF.xsl
│ ├── insertQuoteSemClassInfo.xsl
│ ├── insertSemFeats.xsl
│ ├── introduceMMExtns.xsl
│ ├── labelAppos.xsl
│ ├── labelConj1.xsl
│ ├── labelConj2.xsl
│ ├── labelConj3.xsl
│ ├── labelPlace1.xsl
│ ├── labelPlace2.xsl
│ ├── labelPuncts.xsl
│ ├── lexExtr.xsl
│ ├── macroInsert.xsl
│ ├── macroLexDef.xsl
│ ├── markMistakes.xsl
│ ├── markUnmatched.xsl
│ ├── mergeMorph.xsl
│ ├── morphExtr.xsl
│ ├── normPTBTags.xsl
│ ├── normPunctPos.xsl
│ ├── origPunctRules.xsl
│ ├── overtWHLexRels.xsl
│ ├── overtWHPronouns.xsl
│ ├── phraseExtractor.xsl
│ ├── preSentAdj.xsl
│ ├── punctLexConjRules.xsl
│ ├── reinsertPTBInfo.xsl
│ ├── repairUnmatched.xsl
│ ├── replaceColons.xsl
│ ├── rulesExtr.xsl
│ ├── sentFinalPuncts.xsl
│ ├── trueCaser.xsl
│ └── uncurryBareParse.xsl
├── devel/
│ ├── BEN.TODO
│ └── schedule.txt
├── docs/
│ ├── build.xml
│ ├── ccgbank-README
│ ├── guide/
│ │ ├── build.xml
│ │ ├── cgloss4e.sty
│ │ ├── gb4e.sty
│ │ ├── guide.tex
│ │ ├── openccg.bib
│ │ └── openccg.sty
│ ├── index.html
│ ├── maxent.cpp.patch
│ ├── realizer/
│ │ ├── build.xml
│ │ ├── cgloss4e.sty
│ │ ├── gb4e.sty
│ │ ├── manual.tex
│ │ ├── openccg.sty
│ │ └── refs.bib
│ ├── style.css
│ └── taggers-README
├── grammars/
│ ├── add-chunks.xsl
│ ├── add-family-members.xsl
│ ├── append.xsl
│ ├── categories.xsd
│ ├── comic/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── dict.xsl
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xsl
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── rules-base.xml
│ │ ├── rules.xml
│ │ ├── testbed.xml
│ │ ├── types-extras.xml
│ │ └── types.xml
│ ├── convert-lists.xsl
│ ├── convert-to-graph.xsl
│ ├── convert-to-hlds.xsl
│ ├── core-en/
│ │ ├── add-chunks.xsl
│ │ ├── add-intonation-info.xsl
│ │ ├── adj.xsl
│ │ ├── adv.xsl
│ │ ├── auxv.xsl
│ │ ├── cats.xsl
│ │ ├── conj.xsl
│ │ ├── derive-features.xsl
│ │ ├── det.xsl
│ │ ├── dict.xsl
│ │ ├── drop-features.xsl
│ │ ├── lexicon.xsl
│ │ ├── misc.xsl
│ │ ├── np.xsl
│ │ ├── pp.xsl
│ │ ├── punct.xsl
│ │ ├── raise-nodes.xsl
│ │ ├── templates.xsl
│ │ ├── types.xml
│ │ ├── unary-rules.xsl
│ │ └── v.xsl
│ ├── dict.xsd
│ ├── extract-morph.xsl
│ ├── flights/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── dict.xsl
│ │ ├── flairs.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xsl
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── nina107.xml
│ │ ├── rules-base.xml
│ │ ├── rules.xml
│ │ ├── testbed.xml
│ │ ├── types-extras.xml
│ │ ├── types.xml
│ │ └── vera.xml
│ ├── grammar.xsd
│ ├── hlds.xsd
│ ├── lexicon.xsd
│ ├── mini-basque/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── mini-dyirbal/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── mini-english/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── mini-inuit/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── mini-nezperce/
│ │ ├── build.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ └── types.xml
│ ├── mini-tagalog/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── mini-turkish/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── parameters.xml
│ │ ├── preset-families.xml
│ │ ├── rules.xml
│ │ ├── testbed.out
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── morph.xsd
│ ├── parameters.xsd
│ ├── parametric-lexicon.xsl
│ ├── parametric-types.xsl
│ ├── raise-nodes.xsl
│ ├── routes/
│ │ ├── build.xml
│ │ ├── dict.xml
│ │ ├── dlf_test.xml
│ │ ├── grammar.xml
│ │ ├── lexicon-base.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── rules.xml
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── rules.xsd
│ ├── simplify-lists.xsl
│ ├── tiny/
│ │ ├── build.xml
│ │ ├── grammar.xml
│ │ ├── lexicon.xml
│ │ ├── morph.xml
│ │ ├── rules.xml
│ │ ├── testbed.xml
│ │ └── types.xml
│ ├── tokens.xsd
│ ├── treeify-lists.xsl
│ ├── types.xsd
│ └── worldcup/
│ ├── add-chunks.xsl
│ ├── build.xml
│ ├── dict.xml
│ ├── grammar.xml
│ ├── lexicon-base.xsl
│ ├── lexicon.xml
│ ├── morph.xml
│ ├── raise-nodes.xsl
│ ├── rules.xml
│ └── testbed.xml
├── lib/
│ ├── ASL
│ ├── LGPL
│ ├── LIBNOTES
│ ├── MIT
│ ├── SUN
│ ├── jdom.license
│ └── jline.license
├── pom.xml
├── src/
│ ├── ccg2xml/
│ │ ├── README
│ │ ├── Tree.py
│ │ ├── arabic.ccg
│ │ ├── build.xml
│ │ ├── ccg.ply
│ │ ├── ccg_editor.py
│ │ ├── convert-ply.py
│ │ ├── grammar_template.ccg
│ │ ├── lex.py
│ │ └── yacc.py
│ ├── kenlm/
│ │ ├── COPYING
│ │ ├── COPYING.LESSER
│ │ ├── LICENSE
│ │ ├── README
│ │ ├── build_jnilib.sh
│ │ ├── clean_query_only.sh
│ │ ├── jni/
│ │ │ └── wrap.cc
│ │ ├── lm/
│ │ │ ├── Jamfile
│ │ │ ├── bhiksha.cc
│ │ │ ├── bhiksha.hh
│ │ │ ├── binary_format.cc
│ │ │ ├── binary_format.hh
│ │ │ ├── blank.hh
│ │ │ ├── build_binary_main.cc
│ │ │ ├── config.cc
│ │ │ ├── config.hh
│ │ │ ├── enumerate_vocab.hh
│ │ │ ├── facade.hh
│ │ │ ├── fragment_main.cc
│ │ │ ├── kenlm_max_order_main.cc
│ │ │ ├── left.hh
│ │ │ ├── left_test.cc
│ │ │ ├── lm_exception.cc
│ │ │ ├── lm_exception.hh
│ │ │ ├── max_order.hh
│ │ │ ├── model.cc
│ │ │ ├── model.hh
│ │ │ ├── model_test.cc
│ │ │ ├── model_type.hh
│ │ │ ├── ngram_query.hh
│ │ │ ├── partial.hh
│ │ │ ├── partial_test.cc
│ │ │ ├── quantize.cc
│ │ │ ├── quantize.hh
│ │ │ ├── query_main.cc
│ │ │ ├── read_arpa.cc
│ │ │ ├── read_arpa.hh
│ │ │ ├── return.hh
│ │ │ ├── search_hashed.cc
│ │ │ ├── search_hashed.hh
│ │ │ ├── search_trie.cc
│ │ │ ├── search_trie.hh
│ │ │ ├── sizes.cc
│ │ │ ├── sizes.hh
│ │ │ ├── state.hh
│ │ │ ├── test.arpa
│ │ │ ├── test_nounk.arpa
│ │ │ ├── trie.cc
│ │ │ ├── trie.hh
│ │ │ ├── trie_sort.cc
│ │ │ ├── trie_sort.hh
│ │ │ ├── value.hh
│ │ │ ├── value_build.cc
│ │ │ ├── value_build.hh
│ │ │ ├── virtual_interface.cc
│ │ │ ├── virtual_interface.hh
│ │ │ ├── weights.hh
│ │ │ └── word_index.hh
│ │ └── util/
│ │ ├── Jamfile
│ │ ├── bit_packing.cc
│ │ ├── bit_packing.hh
│ │ ├── bit_packing_test.cc
│ │ ├── double-conversion/
│ │ │ ├── Jamfile
│ │ │ ├── LICENSE
│ │ │ ├── bignum-dtoa.cc
│ │ │ ├── bignum-dtoa.h
│ │ │ ├── bignum.cc
│ │ │ ├── bignum.h
│ │ │ ├── cached-powers.cc
│ │ │ ├── cached-powers.h
│ │ │ ├── diy-fp.cc
│ │ │ ├── diy-fp.h
│ │ │ ├── double-conversion.cc
│ │ │ ├── double-conversion.h
│ │ │ ├── fast-dtoa.cc
│ │ │ ├── fast-dtoa.h
│ │ │ ├── fixed-dtoa.cc
│ │ │ ├── fixed-dtoa.h
│ │ │ ├── ieee.h
│ │ │ ├── strtod.cc
│ │ │ ├── strtod.h
│ │ │ └── utils.h
│ │ ├── ersatz_progress.cc
│ │ ├── ersatz_progress.hh
│ │ ├── exception.cc
│ │ ├── exception.hh
│ │ ├── fake_ofstream.hh
│ │ ├── file.cc
│ │ ├── file.hh
│ │ ├── file_piece.cc
│ │ ├── file_piece.hh
│ │ ├── file_piece_test.cc
│ │ ├── getopt.c
│ │ ├── getopt.hh
│ │ ├── have.hh
│ │ ├── joint_sort.hh
│ │ ├── joint_sort_test.cc
│ │ ├── mmap.cc
│ │ ├── mmap.hh
│ │ ├── multi_intersection.hh
│ │ ├── multi_intersection_test.cc
│ │ ├── murmur_hash.cc
│ │ ├── murmur_hash.hh
│ │ ├── pcqueue.hh
│ │ ├── pool.cc
│ │ ├── pool.hh
│ │ ├── probing_hash_table.hh
│ │ ├── probing_hash_table_test.cc
│ │ ├── proxy_iterator.hh
│ │ ├── read_compressed.cc
│ │ ├── read_compressed.hh
│ │ ├── read_compressed_test.cc
│ │ ├── scoped.cc
│ │ ├── scoped.hh
│ │ ├── sized_iterator.hh
│ │ ├── sorted_uniform.hh
│ │ ├── sorted_uniform_test.cc
│ │ ├── string_piece.cc
│ │ ├── string_piece.hh
│ │ ├── string_piece_hash.hh
│ │ ├── thread_pool.hh
│ │ ├── tokenize_piece.hh
│ │ ├── tokenize_piece_test.cc
│ │ ├── usage.cc
│ │ └── usage.hh
│ ├── opennlp/
│ │ ├── ccg/
│ │ │ ├── Parse.java
│ │ │ ├── Realize.java
│ │ │ ├── TextCCG.java
│ │ │ ├── WebCCG.java
│ │ │ ├── alignment/
│ │ │ │ ├── AbstractEncodingScheme.java
│ │ │ │ ├── Alignment.java
│ │ │ │ ├── Alignments.java
│ │ │ │ ├── EncodingScheme.java
│ │ │ │ ├── IdentifiedPhraseReader.java
│ │ │ │ ├── IdentifiedPhraseWriter.java
│ │ │ │ ├── IndexBase.java
│ │ │ │ ├── Mapping.java
│ │ │ │ ├── MappingFormat.java
│ │ │ │ ├── MappingGroup.java
│ │ │ │ ├── MappingReader.java
│ │ │ │ ├── MappingWriter.java
│ │ │ │ ├── MosesEncodingScheme.java
│ │ │ │ ├── NAACLEncodingScheme.java
│ │ │ │ ├── Phrase.java
│ │ │ │ ├── PhrasePosition.java
│ │ │ │ ├── PhraseReader.java
│ │ │ │ ├── PhraseWriter.java
│ │ │ │ ├── Status.java
│ │ │ │ └── package.html
│ │ │ ├── disjunctivizer/
│ │ │ │ ├── AlignedEdgeFilter.java
│ │ │ │ ├── Disjunctivizer.java
│ │ │ │ ├── EdgeMatchFilter.java
│ │ │ │ ├── FilteredLFEdgeSet.java
│ │ │ │ ├── LFGraphDifference.java
│ │ │ │ ├── LabelMatchFilter.java
│ │ │ │ ├── MatchType.java
│ │ │ │ ├── MatchTypeFilter.java
│ │ │ │ ├── VertexMatchFilter.java
│ │ │ │ └── package.html
│ │ │ ├── grammar/
│ │ │ │ ├── AbstractApplicationRule.java
│ │ │ │ ├── AbstractCompositionRule.java
│ │ │ │ ├── AbstractRule.java
│ │ │ │ ├── AbstractSubstitutionRule.java
│ │ │ │ ├── AbstractTypeRaisingRule.java
│ │ │ │ ├── BackwardApplication.java
│ │ │ │ ├── BackwardComposition.java
│ │ │ │ ├── BackwardSubstitution.java
│ │ │ │ ├── BackwardTypeRaising.java
│ │ │ │ ├── ForwardApplication.java
│ │ │ │ ├── ForwardComposition.java
│ │ │ │ ├── ForwardSubstitution.java
│ │ │ │ ├── ForwardTypeRaising.java
│ │ │ │ ├── FragmentJoining.java
│ │ │ │ ├── GlueRule.java
│ │ │ │ ├── Grammar.java
│ │ │ │ ├── Rule.java
│ │ │ │ ├── RuleGroup.java
│ │ │ │ ├── TypeChangingRule.java
│ │ │ │ ├── Types.java
│ │ │ │ └── to-apml.xsl
│ │ │ ├── grammardoc/
│ │ │ │ ├── AbstractDocumenter.java
│ │ │ │ ├── Documenter.java
│ │ │ │ ├── DocumenterContext.java
│ │ │ │ ├── DocumenterException.java
│ │ │ │ ├── DocumenterFactory.java
│ │ │ │ ├── DocumenterName.java
│ │ │ │ ├── DocumenterNotFoundException.java
│ │ │ │ ├── DocumenterSourceException.java
│ │ │ │ ├── GrammarDoc.java
│ │ │ │ ├── GrammarDocException.java
│ │ │ │ ├── SourceGrammar.java
│ │ │ │ ├── SourceGrammarFile.java
│ │ │ │ ├── SourceGrammarFileType.java
│ │ │ │ └── html/
│ │ │ │ ├── HTMLDocumenter.java
│ │ │ │ ├── base.xsl
│ │ │ │ ├── categories.xsl
│ │ │ │ ├── comments.xsl
│ │ │ │ ├── grammar.xsl
│ │ │ │ ├── grammardoc.css
│ │ │ │ ├── lexicon.js
│ │ │ │ ├── lexicon.xsl
│ │ │ │ ├── morph.xsl
│ │ │ │ ├── navigation.xsl
│ │ │ │ ├── rules.xsl
│ │ │ │ └── types.xsl
│ │ │ ├── hylo/
│ │ │ │ ├── Alt.java
│ │ │ │ ├── Box.java
│ │ │ │ ├── Compacter.java
│ │ │ │ ├── Converter.java
│ │ │ │ ├── Diamond.java
│ │ │ │ ├── EPsScorer.java
│ │ │ │ ├── EnglishAgreementExtractor.java
│ │ │ │ ├── Flattener.java
│ │ │ │ ├── HyloAtom.java
│ │ │ │ ├── HyloFormula.java
│ │ │ │ ├── HyloHelper.java
│ │ │ │ ├── HyloVar.java
│ │ │ │ ├── LexDepFeatureExtractor.java
│ │ │ │ ├── LexDependency.java
│ │ │ │ ├── ModalOp.java
│ │ │ │ ├── Mode.java
│ │ │ │ ├── ModeLabel.java
│ │ │ │ ├── ModeVar.java
│ │ │ │ ├── Nominal.java
│ │ │ │ ├── NominalAtom.java
│ │ │ │ ├── NominalVar.java
│ │ │ │ ├── Op.java
│ │ │ │ ├── Proposition.java
│ │ │ │ ├── SatOp.java
│ │ │ │ └── graph/
│ │ │ │ ├── DefaultLFEdgeFactory.java
│ │ │ │ ├── LFEdge.java
│ │ │ │ ├── LFEdgeFactory.java
│ │ │ │ ├── LFEdgeLabel.java
│ │ │ │ ├── LFGraph.java
│ │ │ │ ├── LFGraphFactory.java
│ │ │ │ ├── LFVertex.java
│ │ │ │ ├── LFVertexType.java
│ │ │ │ └── package.html
│ │ │ ├── lexicon/
│ │ │ │ ├── DataItem.java
│ │ │ │ ├── DefaultTokenizer.java
│ │ │ │ ├── EnglishExpander.java
│ │ │ │ ├── EntriesItem.java
│ │ │ │ ├── FactorChainWord.java
│ │ │ │ ├── FactorKey.java
│ │ │ │ ├── Family.java
│ │ │ │ ├── FullWord.java
│ │ │ │ ├── LexException.java
│ │ │ │ ├── Lexicon.java
│ │ │ │ ├── LicensingFeature.java
│ │ │ │ ├── ListPairWord.java
│ │ │ │ ├── MacroAdder.java
│ │ │ │ ├── MacroItem.java
│ │ │ │ ├── MorphItem.java
│ │ │ │ ├── SimpleWord.java
│ │ │ │ ├── SupertaggerAdapter.java
│ │ │ │ ├── Tokenizer.java
│ │ │ │ ├── TrueCaser.java
│ │ │ │ ├── Word.java
│ │ │ │ └── WordWithPitchAccent.java
│ │ │ ├── ngrams/
│ │ │ │ ├── AAnFilter.java
│ │ │ │ ├── AbstractStandardNgramModel.java
│ │ │ │ ├── ConditionalProbabilityTable.java
│ │ │ │ ├── FactoredNgramModel.java
│ │ │ │ ├── FactoredNgramModelFamily.java
│ │ │ │ ├── KenNgramModel.java
│ │ │ │ ├── LinearNgramScorerCombo.java
│ │ │ │ ├── NgramDiversityPruningStrategy.java
│ │ │ │ ├── NgramFilter.java
│ │ │ │ ├── NgramPrecisionModel.java
│ │ │ │ ├── NgramScorer.java
│ │ │ │ ├── RepetitionScorer.java
│ │ │ │ ├── Reversible.java
│ │ │ │ ├── SRILMNgramModel.java
│ │ │ │ ├── SRILMNgramModelType.java
│ │ │ │ ├── SRILM_FactoredScorerMaker.java
│ │ │ │ ├── SRILM_ScorerMaker.java
│ │ │ │ ├── SelfParaphraseBiaser.java
│ │ │ │ ├── SignScorerInterpolation.java
│ │ │ │ ├── SignScorerProduct.java
│ │ │ │ ├── StandardNgramModel.java
│ │ │ │ └── kenlm/
│ │ │ │ ├── MurmurHash.java
│ │ │ │ └── jni/
│ │ │ │ └── KenLM.java
│ │ │ ├── parse/
│ │ │ │ ├── Chart.java
│ │ │ │ ├── DerivationHistory.java
│ │ │ │ ├── Edge.java
│ │ │ │ ├── EdgeHash.java
│ │ │ │ ├── ParseException.java
│ │ │ │ ├── Parser.java
│ │ │ │ ├── Supertagger.java
│ │ │ │ ├── postagger/
│ │ │ │ │ ├── BasicPOSTagger.java
│ │ │ │ │ ├── DummyPOSTagger.java
│ │ │ │ │ ├── POSTagSequenceGetter.java
│ │ │ │ │ ├── POSTagger.java
│ │ │ │ │ └── ml/
│ │ │ │ │ ├── POSPriorModel.java
│ │ │ │ │ └── POSTagFex.java
│ │ │ │ ├── supertagger/
│ │ │ │ │ ├── JavaSupertaggingApp.java
│ │ │ │ │ ├── LabellingStrategy.java
│ │ │ │ │ ├── WordAndPOSDictionaryLabellingStrategy.java
│ │ │ │ │ ├── io/
│ │ │ │ │ │ ├── XMLPOSDictionaryReader.java
│ │ │ │ │ │ └── XMLWordDictionaryReader.java
│ │ │ │ │ ├── ml/
│ │ │ │ │ │ ├── FeatureExtractor.java
│ │ │ │ │ │ ├── STFex.java
│ │ │ │ │ │ ├── STPriorModel.java
│ │ │ │ │ │ └── ZhangLeTrainingExtractor.java
│ │ │ │ │ └── util/
│ │ │ │ │ ├── PipedTokenizer.java
│ │ │ │ │ ├── ProbPairComparator.java
│ │ │ │ │ ├── STTaggerDictionary.java
│ │ │ │ │ ├── STTaggerPOSDictionary.java
│ │ │ │ │ ├── STTaggerWordDictionary.java
│ │ │ │ │ ├── SupertagSequenceGetter.java
│ │ │ │ │ └── TaggingDictionaryExtractor.java
│ │ │ │ └── tagger/
│ │ │ │ ├── Constants.java
│ │ │ │ ├── ProbIndexPair.java
│ │ │ │ ├── TaggedWord.java
│ │ │ │ ├── io/
│ │ │ │ │ ├── CorpusIterator.java
│ │ │ │ │ ├── PipeDelimitedFactoredBundleCorpusIterator.java
│ │ │ │ │ └── SRILMFactoredBundleCorpusIterator.java
│ │ │ │ ├── ml/
│ │ │ │ │ ├── MaxentModel.java
│ │ │ │ │ ├── TaggerFeature.java
│ │ │ │ │ ├── ZLMEM.java
│ │ │ │ │ └── ZLMaxentModel.java
│ │ │ │ ├── sequencescoring/
│ │ │ │ │ ├── Backpointer.java
│ │ │ │ │ ├── FBNode.java
│ │ │ │ │ ├── SequenceScorer.java
│ │ │ │ │ └── Trellis.java
│ │ │ │ └── util/
│ │ │ │ ├── CCGBankToSRILMFLM.java
│ │ │ │ ├── ConfigFileProcessor.java
│ │ │ │ └── ResultSink.java
│ │ │ ├── perceptron/
│ │ │ │ ├── Alphabet.java
│ │ │ │ ├── ComposedFeatureExtractor.java
│ │ │ │ ├── ComposedFeatureVector.java
│ │ │ │ ├── EventFile.java
│ │ │ │ ├── FeatureExtractor.java
│ │ │ │ ├── FeatureList.java
│ │ │ │ ├── FeatureMap.java
│ │ │ │ ├── FeatureVector.java
│ │ │ │ ├── Model.java
│ │ │ │ ├── PerceptronScorer.java
│ │ │ │ ├── ReRankingPerceptronScorer.java
│ │ │ │ └── Trainer.java
│ │ │ ├── realize/
│ │ │ │ ├── Chart.java
│ │ │ │ ├── DiversityPruningStrategy.java
│ │ │ │ ├── Edge.java
│ │ │ │ ├── EdgeCombos.java
│ │ │ │ ├── EdgeFactory.java
│ │ │ │ ├── EdgeHash.java
│ │ │ │ ├── FeatureLicenser.java
│ │ │ │ ├── Hypertagger.java
│ │ │ │ ├── LexicalDiversityPruningStrategy.java
│ │ │ │ ├── NBestPruningStrategy.java
│ │ │ │ ├── PruningStrategy.java
│ │ │ │ ├── Realizer.java
│ │ │ │ ├── RuleInstance.java
│ │ │ │ ├── StemPruningStrategy.java
│ │ │ │ ├── Tracker.java
│ │ │ │ └── hypertagger/
│ │ │ │ ├── FeatureExtractionException.java
│ │ │ │ ├── LFInfo.java
│ │ │ │ ├── LFLoader.java
│ │ │ │ ├── LMFactorExtractor.java
│ │ │ │ ├── TagExtract.java
│ │ │ │ ├── TagExtractor.java
│ │ │ │ ├── ZLMaxentHypertagger.java
│ │ │ │ ├── ZLMaxentModel.java
│ │ │ │ └── ZLPOSTagger.java
│ │ │ ├── synsem/
│ │ │ │ ├── AbstractCat.java
│ │ │ │ ├── Arg.java
│ │ │ │ ├── ArgStack.java
│ │ │ │ ├── AtomCat.java
│ │ │ │ ├── BasicArg.java
│ │ │ │ ├── CatReader.java
│ │ │ │ ├── Category.java
│ │ │ │ ├── CategoryFcn.java
│ │ │ │ ├── CategoryFcnAdapter.java
│ │ │ │ ├── ComplexCat.java
│ │ │ │ ├── DerivationHandler.java
│ │ │ │ ├── Dollar.java
│ │ │ │ ├── GenerativeSyntacticModel.java
│ │ │ │ ├── LF.java
│ │ │ │ ├── LexLogProbFeatureExtractor.java
│ │ │ │ ├── LexSemOrigin.java
│ │ │ │ ├── Modality.java
│ │ │ │ ├── ReRankingScorer.java
│ │ │ │ ├── SetArg.java
│ │ │ │ ├── Sign.java
│ │ │ │ ├── SignHash.java
│ │ │ │ ├── SignScorer.java
│ │ │ │ ├── Slash.java
│ │ │ │ ├── SlashMode.java
│ │ │ │ ├── SyntacticFeatureExtractor.java
│ │ │ │ ├── TargetCat.java
│ │ │ │ └── VarModality.java
│ │ │ ├── test/
│ │ │ │ ├── CrossValidateRealizer.java
│ │ │ │ ├── DerivMaker.java
│ │ │ │ ├── GenTargets.java
│ │ │ │ ├── Regression.java
│ │ │ │ ├── RegressionInfo.java
│ │ │ │ ├── ScorerMaker.java
│ │ │ │ ├── TimingMap.java
│ │ │ │ ├── UpdateTestbed.java
│ │ │ │ └── Validator.java
│ │ │ ├── unify/
│ │ │ │ ├── EmptySubstitution.java
│ │ │ │ ├── Feature.java
│ │ │ │ ├── FeatureStructure.java
│ │ │ │ ├── GFeatStruc.java
│ │ │ │ ├── GFeatVar.java
│ │ │ │ ├── GSubstitution.java
│ │ │ │ ├── GUnifier.java
│ │ │ │ ├── Indexed.java
│ │ │ │ ├── ModFcn.java
│ │ │ │ ├── Mutable.java
│ │ │ │ ├── SelfCondensingSub.java
│ │ │ │ ├── SimpleSubstitution.java
│ │ │ │ ├── SimpleType.java
│ │ │ │ ├── Substitution.java
│ │ │ │ ├── Unifiable.java
│ │ │ │ ├── Unifier.java
│ │ │ │ ├── UnifyControl.java
│ │ │ │ ├── UnifyFailure.java
│ │ │ │ └── Variable.java
│ │ │ └── util/
│ │ │ ├── ArrayListWithIdentityEquals.java
│ │ │ ├── CompositeFilter.java
│ │ │ ├── DelegatedFilter.java
│ │ │ ├── DisplayPrefs.java
│ │ │ ├── Filter.java
│ │ │ ├── FilteredMap.java
│ │ │ ├── FilteredSet.java
│ │ │ ├── GroupMap.java
│ │ │ ├── IntHashSetMap.java
│ │ │ ├── Interner.java
│ │ │ ├── InverseFilter.java
│ │ │ ├── JLineReader.java
│ │ │ ├── LineReader.java
│ │ │ ├── ListMap.java
│ │ │ ├── MembershipFilter.java
│ │ │ ├── Pair.java
│ │ │ ├── SingletonList.java
│ │ │ ├── StructureSharingList.java
│ │ │ ├── TrieMap.java
│ │ │ ├── VisitedFilter.java
│ │ │ ├── Visualizer.java
│ │ │ └── XmlScanner.java
│ │ └── ccgbank/
│ │ ├── CCGBankConvert.java
│ │ ├── CCGBankExtract.java
│ │ ├── CCGBankTask.java
│ │ ├── CCGBankTaskFileGroup.java
│ │ ├── CCGBankTaskSources.java
│ │ ├── CCGBankTaskTemplates.java
│ │ ├── CCGBankTaskTestbed.java
│ │ ├── InputSourceAdapter.java
│ │ ├── TemplatesProcessor.java
│ │ ├── XMLFilterProcessor.java
│ │ ├── XSLTProcessor.java
│ │ ├── ccgbank.properties
│ │ ├── convert/
│ │ │ ├── ApposTally.java
│ │ │ ├── DiscrCheck.java
│ │ │ ├── GenChal11Adjuster.java
│ │ │ ├── GenConjRule.java
│ │ │ ├── InfoHelper.java
│ │ │ ├── Javafns.java
│ │ │ ├── MWHelper.java
│ │ │ ├── MorphLookup.java
│ │ │ ├── OrigPunctRules.java
│ │ │ ├── PunctHelper.java
│ │ │ ├── RoleAdjuster.java
│ │ │ └── XSLTTrueCaser.java
│ │ ├── extract/
│ │ │ ├── CatNode.java
│ │ │ ├── DebugHelper.java
│ │ │ ├── DefaultLFHelper.java
│ │ │ ├── ExtractGrammar.java
│ │ │ ├── FreqTally.java
│ │ │ ├── InsertLFHelper.java
│ │ │ ├── LexExtract.java
│ │ │ ├── MorphExtrHelper.java
│ │ │ ├── MorphExtract.java
│ │ │ ├── RulesExtract.java
│ │ │ ├── RulesTally.java
│ │ │ └── Testbed.java
│ │ ├── lexicon-base.xsl
│ │ ├── parse/
│ │ │ ├── CCGbankDerivation.jjt
│ │ │ ├── SimpleNode.java
│ │ │ └── grammarInsert
│ │ └── rules-base.xsl
│ ├── pom.xml
│ └── srilmbridge/
│ ├── Makefile
│ └── srilmbridge.cpp
└── test/
├── grammar.xml
├── lexicon.xml
├── morph.xml
├── opennlp/
│ └── ccg/
│ ├── alignment/
│ │ ├── AlignmentTest.java
│ │ ├── IdentifiedPhraseReaderWriterTest.java
│ │ ├── IndexBaseTest.java
│ │ ├── MappingFormatTest.java
│ │ ├── MappingGroupTest.java
│ │ ├── MappingReaderWriterTest.java
│ │ ├── MappingTest.java
│ │ ├── PhraseReaderWriterTest.java
│ │ └── PhraseTest.java
│ ├── disjunctivizer/
│ │ ├── AlignedEdgeFilterTest.java
│ │ ├── DisjunctivizerTest.java
│ │ ├── EdgeMatchFilterTest.java
│ │ ├── FilteredLFEdgeSetTest.java
│ │ ├── LFGraphDifferenceTest.java
│ │ ├── LabelMatchFilterTest.java
│ │ └── VertexMatchFilterTest.java
│ ├── hylo/
│ │ └── graph/
│ │ ├── LFBaseTest.java
│ │ ├── LFEdgeFactoryTest.java
│ │ ├── LFEdgeTest.java
│ │ ├── LFGraphTest.java
│ │ └── LFVertexTest.java
│ └── util/
│ ├── CompositeFilterTest.java
│ ├── DelegatedFilterTest.java
│ ├── FilteredMapTest.java
│ ├── FilteredSetTest.java
│ ├── InverseFilterTest.java
│ ├── MembershipFilterTest.java
│ └── VisitedFilterTest.java
├── output.xml
├── paraphrases.xml
├── rules.xml
└── testlf.xml
Showing preview only (470K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (5554 symbols across 505 files)
FILE: bin/ccg_draw_tree.py
function get_deriv (line 24) | def get_deriv(autofile, deriv_id):
function parse_ccgbank_node (line 50) | def parse_ccgbank_node(s):
function parse_ccgbank_leaf (line 54) | def parse_ccgbank_leaf(s):
function excise_empty_nodes (line 58) | def excise_empty_nodes(t):
function parse_ccgbank_tree (line 64) | def parse_ccgbank_tree(s):
FILE: bin/dlf_parser.py
function wordNum (line 34) | def wordNum(wid):
function span (line 41) | def span(nid, graph, w):
function findAll (line 52) | def findAll(elem, match):
class Pred (line 56) | class Pred:
method __init__ (line 57) | def __init__(self):
class Node (line 63) | class Node:
method __init__ (line 64) | def __init__(self):
method addPred (line 69) | def addPred(self, pred, attrib, one_of, opt):
method moses (line 74) | def moses(self, graph):
method dot (line 88) | def dot(self):
method info (line 120) | def info(self):
function parseId (line 130) | def parseId(str):
function parseClass (line 136) | def parseClass(str):
function parseNode (line 142) | def parseNode(node, graph, nodes):
function parseOpt (line 165) | def parseOpt(opt, node, graph, nodes):
function parseOneOf (line 177) | def parseOneOf(oneof, node, attrib, pred, graph, nodes):
function parseRel (line 210) | def parseRel(rel, nid, graph, nodes, style):
FILE: ccgbank/bin/convert-mtc-systems.py
function tokenize (line 15) | def tokenize(t):
function decode_line (line 27) | def decode_line(ln, encoding):
FILE: ccgbank/bin/correlate-to-judgments.py
function num2string (line 18) | def num2string(n):
FILE: ccgbank/bin/find-betas-no-gold.py
class STIterator (line 29) | class STIterator:
method __init__ (line 30) | def __init__(self, f):
method next (line 32) | def next(self):
method __iter__ (line 64) | def __iter__(self): return self
FILE: ccgbank/bin/get-truecase-list.py
function isAllUpper (line 11) | def isAllUpper(st):
FILE: ccgbank/bin/get_factors_from_parse.py
function getLexicalNodes (line 57) | def getLexicalNodes(tree):
FILE: ccgbank/bin/merge_pos_ne.py
class POSOutputIter (line 13) | class POSOutputIter:
method __init__ (line 14) | def __init__(self, filelikeobj):
method __iter__ (line 17) | def __iter__(self): return self
method next (line 19) | def next(self):
FILE: ccgbank/bin/my_unicode.py
function removeInvalidChars (line 15) | def removeInvalidChars(text):
FILE: ccgbank/bin/nbest-mtc-to-bleu-nist.py
function remove_ne (line 17) | def remove_ne(txt):
FILE: ccgbank/bin/ner/NERApp/src/nerapp/NERApp.java
class NERApp (line 29) | public class NERApp {
method main (line 31) | @SuppressWarnings("unchecked")
method classifyToString (line 148) | public static String classifyToString(List<CoreMap> sentence, Document...
class MyWord (line 162) | class MyWord implements HasWord {
method MyWord (line 166) | public MyWord(String wd) {
method word (line 170) | public String word() {
method setWord (line 174) | public void setWord(String string) {
FILE: ccgbank/bin/ner/ner_word.py
class NERWord (line 1) | class NERWord:
method __init__ (line 5) | def __init__(self, wd, label=None, delim="/"):
method parseLineOfWords (line 11) | def parseLineOfWords(ln):
method getLabel (line 55) | def getLabel(self):
method getWord (line 58) | def getWord(self):
method __repr__ (line 61) | def __repr__(self): return self.__str__()
method __str__ (line 63) | def __str__(self): return self.wd + self.delim + (self.getLabel() if n...
FILE: ccgbank/bin/ner/post-process-stanford-ner.py
function fuseNERWords (line 20) | def fuseNERWords(list_of_ner_words):
FILE: ccgbank/plugins/MyGenSynScorer.java
class MyGenSynScorer (line 7) | public class MyGenSynScorer extends GenerativeSyntacticModel
method getModelDir (line 10) | static String getModelDir() {
method MyGenSynScorer (line 16) | public MyGenSynScorer() throws IOException {
FILE: ccgbank/plugins/MyNgramCombo.java
class MyNgramCombo (line 8) | public class MyNgramCombo extends LinearNgramScorerCombo
method bigWordsLM (line 10) | static String bigWordsLM() {
method wordsLM (line 16) | static String wordsLM() { return System.getProperty("words.lm", "model...
method wordsSCLM (line 17) | static String wordsSCLM() { return System.getProperty("words.sc.lm", "...
method stposFLM (line 18) | static String stposFLM() { return System.getProperty("stpos.flm", "mod...
method getBigLM (line 24) | static NgramScorer getBigLM() throws IOException {
method getWordsLM (line 33) | static NgramScorer getWordsLM() throws IOException {
method MyNgramCombo (line 42) | public MyNgramCombo() throws IOException {
FILE: ccgbank/plugins/MyNgramGenSynProduct.java
class MyNgramGenSynProduct (line 7) | public class MyNgramGenSynProduct extends SignScorerProduct
method MyNgramGenSynProduct (line 9) | public MyNgramGenSynProduct() throws IOException {
FILE: ccgbank/plugins/MyNgramPrecisionBaselineGenInterp.java
class MyNgramPrecisionBaselineGenInterp (line 9) | public class MyNgramPrecisionBaselineGenInterp extends SignScorerInterpo...
method MyNgramPrecisionBaselineGenInterp (line 15) | public MyNgramPrecisionBaselineGenInterp() throws IOException {
method setTargets (line 23) | public void setTargets(String[] targets) { selfBiaser.setTargets(targe...
FILE: ccgbank/plugins/MyNgramPrecisionPerceptronInterp.java
class MyNgramPrecisionPerceptronInterp (line 9) | public class MyNgramPrecisionPerceptronInterp extends SignScorerInterpol...
method MyNgramPrecisionPerceptronInterp (line 20) | public MyNgramPrecisionPerceptronInterp() throws IOException {
method setTargets (line 29) | public void setTargets(String[] targets) { selfBiaser.setTargets(targe...
FILE: ccgbank/plugins/MyParserPerceptronScorer.java
class MyParserPerceptronScorer (line 10) | public class MyParserPerceptronScorer extends ReRankingPerceptronScorer
method getModelDir (line 13) | static String getModelDir() {
method getModelName (line 20) | static String getModelName() { return System.getProperty("parser.model...
method MyParserPerceptronScorer (line 22) | public MyParserPerceptronScorer() throws IOException {
method getBaseScorer (line 30) | protected SignScorer getBaseScorer(FeatureExtractor featureExtractor) {
FILE: ccgbank/plugins/MyRealizerPerceptronScorer.java
class MyRealizerPerceptronScorer (line 9) | public class MyRealizerPerceptronScorer extends PerceptronScorer
method getModelDir (line 11) | static String getModelDir() {
method getModelName (line 17) | static String getModelName() { return System.getProperty("realizer.mod...
method getAgrMultiplier (line 19) | static String getAgrMultiplier() { return System.getProperty("realizer...
method calcAgrMultiplier (line 21) | static double calcAgrMultiplier() {
method MyRealizerPerceptronScorer (line 34) | public MyRealizerPerceptronScorer() throws IOException {
method adjustedWeight (line 47) | public double adjustedWeight(String name, double weight) {
method isAdjacentPunctFeat (line 56) | private static boolean isAdjacentPunctFeat(String name) {
method isPunct (line 66) | private static boolean isPunct(String token) {
FILE: ccgbank/plugins/MySynAgrFeatureExtractor.java
class MySynAgrFeatureExtractor (line 8) | public class MySynAgrFeatureExtractor extends ComposedFeatureExtractor
method MySynAgrFeatureExtractor (line 10) | public MySynAgrFeatureExtractor() {
FILE: ccgbank/plugins/MySynSemAgrFeatureExtractor.java
class MySynSemAgrFeatureExtractor (line 8) | public class MySynSemAgrFeatureExtractor extends ComposedFeatureExtractor
method MySynSemAgrFeatureExtractor (line 10) | public MySynSemAgrFeatureExtractor() {
FILE: ccgbank/plugins/MySynSemFeatureExtractor.java
class MySynSemFeatureExtractor (line 8) | public class MySynSemFeatureExtractor extends ComposedFeatureExtractor
method MySynSemFeatureExtractor (line 10) | public MySynSemFeatureExtractor() {
FILE: src/ccg2xml/Tree.py
function report_callback_exception (line 83) | def report_callback_exception():
class Struct (line 92) | class Struct:
method __init__ (line 94) | def __init__(self):
class Node (line 98) | class Node:
method __init__ (line 120) | def __init__(self, parent_node, id, collapsed_icon, x, y,
method set_collapsed_icon (line 172) | def set_collapsed_icon(self, icon):
method set_expanded_icon (line 178) | def set_expanded_icon(self, icon):
method parent (line 184) | def parent(self):
method prev_sib (line 188) | def prev_sib(self):
method next_sib (line 196) | def next_sib(self):
method next_visible (line 204) | def next_visible(self):
method prev_visible (line 220) | def prev_visible(self):
method children (line 233) | def children(self):
method get_label (line 237) | def get_label(self):
method set_label (line 241) | def set_label(self, label):
method expanded (line 245) | def expanded(self):
method expandable (line 249) | def expandable(self):
method full_id (line 253) | def full_id(self):
method expand (line 260) | def expand(self):
method collapse (line 265) | def collapse(self):
method delete (line 270) | def delete(self, me_too=1):
method insert_before (line 305) | def insert_before(self, nodes):
method insert_after (line 311) | def insert_after(self, nodes):
method insert_children (line 317) | def insert_children(self, nodes):
method toggle_state (line 322) | def toggle_state(self):
method PVT_enter (line 331) | def PVT_enter(self, event):
method dnd_end (line 335) | def dnd_end(self, target, event):
method PVT_last (line 348) | def PVT_last(self):
method PVT_find (line 355) | def PVT_find(self, search):
method PVT_insert (line 370) | def PVT_insert(self, nodes, pos, below):
method PVT_set_state (line 413) | def PVT_set_state(self, state):
method PVT_cleanup_lines (line 441) | def PVT_cleanup_lines(self):
method PVT_update_scrollregion (line 452) | def PVT_update_scrollregion(self):
method PVT_delete_subtree (line 457) | def PVT_delete_subtree(self):
method PVT_unbind_all (line 481) | def PVT_unbind_all(self):
method PVT_tag_move (line 490) | def PVT_tag_move(self, dist):
method PVT_click (line 507) | def PVT_click(self, event):
class Tree (line 521) | class Tree(Canvas):
method __init__ (line 523) | def __init__(self, master, root_id, root_label='',
method PVT_mousefocus (line 620) | def PVT_mousefocus(self, event):
method tag_bind (line 625) | def tag_bind(self, tag, seq, *args, **kw_args):
method add_list (line 633) | def add_list(self, list=None, name=None, id=None, flag=0,
method add_node (line 662) | def add_node(self, name=None, id=None, flag=0, expanded_icon=None,
method find_full_id (line 668) | def find_full_id(self, search):
method cursor_node (line 672) | def cursor_node(self, search):
method see (line 676) | def see(self, *items):
method move_cursor (line 702) | def move_cursor(self, node):
method toggle (line 709) | def toggle(self, event=None):
method next (line 713) | def next(self, event=None):
method prev (line 717) | def prev(self, event=None):
method ascend (line 721) | def ascend(self, event=None):
method descend (line 727) | def descend(self, event=None):
method first (line 738) | def first(self, event=None):
method last (line 743) | def last(self, event=None):
method pageup (line 748) | def pageup(self, event=None):
method pagedown (line 757) | def pagedown(self, event=None):
method where (line 767) | def where(self, event):
method dnd_accept (line 779) | def dnd_accept(self, source, event):
method dnd_enter (line 785) | def dnd_enter(self, source, event):
method dnd_motion (line 805) | def dnd_motion(self, source, event):
method dnd_leave (line 813) | def dnd_leave(self, source, event):
method dnd_commit (line 818) | def dnd_commit(self, source, event):
function get_contents (line 849) | def get_contents(node):
FILE: src/ccg2xml/ccg_editor.py
class CTab (line 50) | class CTab(Frame):
method __init__ (line 55) | def __init__(self, parent, cfile, tabname):
method add_menu (line 96) | def add_menu(self, after, menu):
method remove_toolbar_button (line 105) | def remove_toolbar_button(self, name):
method reinit (line 112) | def reinit(self):
method onSave (line 119) | def onSave(self):
method onSaveAs (line 122) | def onSaveAs(self, forcefile=None):
class CEdit (line 136) | class CEdit(CTab):
method __init__ (line 137) | def __init__(self, parent, cfile):
method scrollSet (line 209) | def scrollSet(self, *args):
method reinit (line 213) | def reinit(self):
method showLineNums (line 217) | def showLineNums(self):
method onValidate (line 228) | def onValidate(self, editFrame, cfile):
method debugError (line 244) | def debugError(self, editFrame, cfile):
method onCopy (line 272) | def onCopy(self): # get text selected by mou...
method onDelete (line 280) | def onDelete(self): # delete selected text, no...
method onCut (line 286) | def onCut(self):
method onPaste (line 293) | def onPaste(self):
method onSelectAll (line 304) | def onSelectAll(self):
method onChange (line 313) | def onChange(self):
method onDoFind (line 327) | def onDoFind(self):
method onDoChange (line 330) | def onDoChange(self):
method isEmpty (line 342) | def isEmpty(self):
method getAllText (line 345) | def getAllText(self):
method setAllText (line 348) | def setAllText(self, text):
method clearAllText (line 355) | def clearAllText(self):
class CWords (line 359) | class CWords(CTab):
method __init__ (line 360) | def __init__(self, parent, cfile):
method reinit (line 367) | def reinit(self):
class CLexicon (line 395) | class CLexicon(CTab):
class lexicon_vars (line 396) | class lexicon_vars(object):
method __init__ (line 397) | def __init__(self):
method __init__ (line 407) | def __init__(self, parent, cfile):
method reinit (line 425) | def reinit(self):
method redraw (line 428) | def redraw(self):
class CRules (line 466) | class CRules(CTab):
method __init__ (line 467) | def __init__(self, parent, cfile):
class CFeatures (line 470) | class CFeatures(CTab):
method __init__ (line 471) | def __init__(self, parent, cfile):
method reinit (line 479) | def reinit(self):
method get_treedata (line 557) | def get_treedata(self,node):
method expand_tree (line 568) | def expand_tree(self, node):
method expand_all (line 574) | def expand_all(self):
method contract_all (line 577) | def contract_all(self):
method edit_tree (line 580) | def edit_tree(self, parent):
method save_tree (line 628) | def save_tree(self, parent):
class CTestbed (line 639) | class CTestbed(CTab):
method __init__ (line 640) | def __init__(self, parent, cfile):
method makelab (line 650) | def makelab(self, text, row, col, **props):
method reinit (line 656) | def reinit(self):
method edit_testbed (line 728) | def edit_testbed(self):
method save_testbed (line 780) | def save_testbed(self):
method new_sentence (line 793) | def new_sentence(self):
method editNew (line 815) | def editNew(self, master, sent, nParses):
class CFile (line 864) | class CFile(object):
method __init__ (line 899) | def __init__(self, file=None):
method switch_to (line 948) | def switch_to(self, mode):
method makeMenubar (line 980) | def makeMenubar(self):
method addMenuItems (line 997) | def addMenuItems(self, menu, items):
method makeToolbar (line 1015) | def makeToolbar(self, selected):
method makeCheckbar (line 1031) | def makeCheckbar(self):
method getAllText (line 1045) | def getAllText(self):
method setAllText (line 1048) | def setAllText(self, text):
method _getints (line 1052) | def _getints(self, string):
method edit (line 1061) | def edit(self, *args):
method edit_modified (line 1078) | def edit_modified(self, arg=None):
method onInfo (line 1095) | def onInfo(self):
method onGoto (line 1115) | def onGoto(self, line=None):
method onFind (line 1131) | def onFind(self, lastkey=None):
method onRefind (line 1147) | def onRefind(self):
method onFontList (line 1154) | def onFontList(self):
method onColorList (line 1160) | def onColorList(self):
method onPickFg (line 1166) | def onPickFg(self):
method onPickBg (line 1168) | def onPickBg(self):
method pickColor (line 1170) | def pickColor(self, part):
method getSignature (line 1211) | def getSignature(self, contents):
method my_askopenfilename (line 1214) | def my_askopenfilename(self): # objects remember last result dir/...
method my_asksaveasfilename (line 1220) | def my_asksaveasfilename(self): # objects remember last result dir/...
method onOpen (line 1227) | def onOpen(self):
method onFirstOpen (line 1233) | def onFirstOpen(self, file):
method compile_if_needed (line 1242) | def compile_if_needed(self):
method onDisplay (line 1255) | def onDisplay(self):
method onEdit (line 1258) | def onEdit(self):
method onLexicon (line 1261) | def onLexicon(self):
method onTestbed (line 1264) | def onTestbed(self):
method onRules (line 1267) | def onRules(self):
method onWords (line 1270) | def onWords(self):
method onFeatures (line 1273) | def onFeatures(self):
method onNew (line 1276) | def onNew(self):
method getFileName (line 1279) | def getFileName(self):
method setFileName (line 1282) | def setFileName(self, name):
method help (line 1291) | def help(self):
method onClose (line 1295) | def onClose(self):
method onQuit (line 1305) | def onQuit(self):
function main (line 1316) | def main():
FILE: src/ccg2xml/convert-ply.py
function syntax_error (line 89) | def syntax_error(err, line):
function make_name_python_safe (line 103) | def make_name_python_safe(name):
function replace_dollar_signs (line 110) | def replace_dollar_signs(code, renumber_at=None):
function output_python_cfg_rule (line 159) | def output_python_cfg_rule(fil, lhs, rhs, code):
function output_default_python_cfg_rule (line 218) | def output_default_python_cfg_rule(fil, lhs, rhs):
function finish_any_cfg (line 221) | def finish_any_cfg(fil):
function clear_rule_context (line 231) | def clear_rule_context():
FILE: src/ccg2xml/lex.py
class LexError (line 195) | class LexError(Exception):
method __init__ (line 196) | def __init__(self,message,s):
class LexToken (line 201) | class LexToken:
method __str__ (line 202) | def __str__(self):
method __repr__ (line 204) | def __repr__(self):
method skip (line 206) | def skip(self,n):
class Lexer (line 219) | class Lexer:
method __init__ (line 220) | def __init__(self):
method __copy__ (line 234) | def __copy__(self):
method input (line 253) | def input(self,s):
method errtoken (line 269) | def errtoken(self):
method realtoken (line 279) | def realtoken(self):
function validate_file (line 357) | def validate_file(filename):
function _read_lextab (line 395) | def _read_lextab(lexer, fdict, module):
function lex (line 414) | def lex(module=None,debug=0,optimize=0,lextab="lextab"):
function runmain (line 672) | def runmain(lexer=None,data=None):
FILE: src/ccg2xml/yacc.py
class YaccError (line 71) | class YaccError(Exception): pass
class YaccSymbol (line 88) | class YaccSymbol:
method __str__ (line 89) | def __str__(self): return self.type
method __repr__ (line 90) | def __repr__(self): return str(self)
class YaccProduction (line 100) | class YaccProduction:
method __init__ (line 101) | def __init__(self,s):
method __getitem__ (line 105) | def __getitem__(self,n):
method __setitem__ (line 108) | def __setitem__(self,n,v):
method __len__ (line 111) | def __len__(self):
method lineno (line 114) | def lineno(self,n):
method linespan (line 117) | def linespan(self,n):
method pushback (line 122) | def pushback(self,n):
class Parser (line 135) | class Parser:
method __init__ (line 136) | def __init__(self,magic=None):
method errok (line 152) | def errok(self):
method restart (line 155) | def restart(self):
method parse (line 163) | def parse(self,input=None,lexer=None,debug=0):
function validate_file (line 397) | def validate_file(filename):
function validate_dict (line 427) | def validate_dict(d):
function initialize_vars (line 450) | def initialize_vars():
class Production (line 537) | class Production:
method __init__ (line 538) | def __init__(self,**kw):
method __str__ (line 549) | def __str__(self):
method __repr__ (line 556) | def __repr__(self):
method lr_item (line 560) | def lr_item(self,n):
class MiniProduction (line 586) | class MiniProduction:
function is_identifier (line 590) | def is_identifier(s):
function add_production (line 612) | def add_production(f,file,line,prodname,syms):
function add_function (line 707) | def add_function(f):
function compute_reachable (line 768) | def compute_reachable():
function mark_reachable_from (line 784) | def mark_reachable_from(s, Reachable):
function compute_terminates (line 803) | def compute_terminates():
function verify_productions (line 869) | def verify_productions(cycle_check=1):
function build_lritems (line 948) | def build_lritems():
function add_precedence (line 973) | def add_precedence(plist):
function augment_grammar (line 1003) | def augment_grammar(start=None):
function first (line 1019) | def first(beta):
function compute_follow (line 1053) | def compute_follow(start=None):
function compute_first1 (line 1097) | def compute_first1():
function lr_init_vars (line 1138) | def lr_init_vars():
function lr0_closure (line 1152) | def lr0_closure(I):
function lr0_goto (line 1180) | def lr0_goto(I,x):
function lr0_goto_setnumber (line 1216) | def lr0_goto_setnumber(I_setnumber, x):
function lr0_kernel (line 1244) | def lr0_kernel(I):
function lr0_items (line 1255) | def lr0_items():
function slr_parse_table (line 1289) | def slr_parse_table():
function lr1_closure (line 1479) | def lr1_closure(I, setnumber = 0):
function add_lookaheads (line 1533) | def add_lookaheads(K):
function ReduceNonterminals (line 1602) | def ReduceNonterminals():
function ReduceToTerminals (line 1620) | def ReduceToTerminals(nt):
function ReduceToNonterminals (line 1640) | def ReduceToNonterminals(nt):
function lalr_parse_table (line 1668) | def lalr_parse_table():
function lr_write_tables (line 2026) | def lr_write_tables(modulename=tab_module,outputdir=''):
function lr_read_tables (line 2136) | def lr_read_tables(module=tab_module,optimize=0):
function yacc (line 2159) | def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_...
function yacc_cleanup (line 2390) | def yacc_cleanup():
function parse (line 2409) | def parse(*args,**kwargs):
FILE: src/kenlm/jni/wrap.cc
type __gnu_cxx (line 16) | namespace __gnu_cxx {
type hash<unsigned long long int> (line 18) | struct hash<unsigned long long int>
type StaticCheck (line 32) | struct StaticCheck {}
type StaticCheck<true> (line 34) | struct StaticCheck<true> {
function MapArray (line 42) | void MapArray(const std::vector<lm::WordIndex>& map, jint *begin, jint *...
class VirtualBase (line 57) | class VirtualBase {
method VirtualBase (line 72) | VirtualBase() {
class VirtualImpl (line 78) | class VirtualImpl: public VirtualBase {
method VirtualImpl (line 80) | VirtualImpl(const char *name, float fake_oov_cost) :
method Prob (line 89) | float Prob(jint * const begin, jint * const end) const {
method ProbString (line 102) | float ProbString(jint * const begin, jint * const end, jint start) con...
method Order (line 134) | uint8_t Order() const {
method RegisterWord (line 138) | bool RegisterWord(const StringPiece& word, const int wd_id) {
function VirtualBase (line 155) | VirtualBase *ConstructModel(const char *file_name, float fake_oov_cost) {
method VirtualBase (line 72) | VirtualBase() {
function JNIEXPORT (line 183) | JNIEXPORT jlong JNICALL Java_opennlp_ccg_ngrams_kenlm_jni_KenLM_construct(
function JNIEXPORT (line 199) | JNIEXPORT void JNICALL Java_opennlp_ccg_ngrams_kenlm_jni_KenLM_destroy(
function JNIEXPORT (line 204) | JNIEXPORT jint JNICALL Java_opennlp_ccg_ngrams_kenlm_jni_KenLM_order(
function JNIEXPORT (line 209) | JNIEXPORT jboolean JNICALL Java_opennlp_ccg_ngrams_kenlm_jni_KenLM_regis...
function JNIEXPORT (line 225) | JNIEXPORT jfloat JNICALL Java_opennlp_ccg_ngrams_kenlm_jni_KenLM_prob(
function JNIEXPORT (line 238) | JNIEXPORT jfloat JNICALL Java_opennlp_ccg_ngrams_kenlm_jni_KenLM_probStr...
FILE: src/kenlm/lm/bhiksha.cc
type lm (line 8) | namespace lm {
type ngram (line 9) | namespace ngram {
type trie (line 10) | namespace trie {
function ChopBits (line 30) | uint8_t ChopBits(uint64_t max_offset, uint64_t max_next, const Con...
function ArrayCount (line 46) | std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, con...
FILE: src/kenlm/lm/bhiksha.hh
type lm (line 24) | namespace lm {
type ngram (line 25) | namespace ngram {
type Config (line 26) | struct Config
type trie (line 28) | namespace trie {
class DontBhiksha (line 30) | class DontBhiksha {
method UpdateConfigFromBinary (line 34) | static void UpdateConfigFromBinary(int /*fd*/, Config &/*config*...
method Size (line 36) | static uint64_t Size(uint64_t /*max_offset*/, uint64_t /*max_nex...
method InlineBits (line 38) | static uint8_t InlineBits(uint64_t /*max_offset*/, uint64_t max_...
method ReadNext (line 44) | void ReadNext(const void *base, uint64_t bit_offset, uint64_t /*...
method WriteNext (line 50) | void WriteNext(void *base, uint64_t bit_offset, uint64_t /*index...
method FinishedLoading (line 54) | void FinishedLoading(const Config &/*config*/) {}
method LoadedBinary (line 56) | void LoadedBinary() {}
method InlineBits (line 58) | uint8_t InlineBits() const { return next_.bits; }
class ArrayBhiksha (line 64) | class ArrayBhiksha {
method ReadNext (line 76) | void ReadNext(const void *base, uint64_t bit_offset, uint64_t in...
method WriteNext (line 88) | void WriteNext(void *base, uint64_t bit_offset, uint64_t index, ...
method InlineBits (line 98) | uint8_t InlineBits() const { return next_inline_.bits; }
FILE: src/kenlm/lm/binary_format.cc
type lm (line 14) | namespace lm {
type ngram (line 15) | namespace ngram {
type OldSanity (line 25) | struct OldSanity {
method SetToReference (line 31) | void SetToReference() {
type Sanity (line 43) | struct Sanity {
method SetToReference (line 49) | void SetToReference() {
function TotalHeaderSize (line 62) | std::size_t TotalHeaderSize(unsigned char order) {
function WriteHeader (line 66) | void WriteHeader(void *to, const Parameters ¶ms) {
function FinishFile (line 128) | void FinishFile(const Config &config, ModelType model_type, unsigned...
type detail (line 158) | namespace detail {
function IsBinaryFormat (line 160) | bool IsBinaryFormat(int fd) {
function ReadHeader (line 192) | void ReadHeader(int fd, Parameters &out) {
function MatchCheck (line 202) | void MatchCheck(ModelType model_type, unsigned int search_version,...
function SeekPastHeader (line 211) | void SeekPastHeader(int fd, const Parameters ¶ms) {
function ComplainAboutARPA (line 232) | void ComplainAboutARPA(const Config &config, ModelType model_type) {
function RecognizeBinary (line 244) | bool RecognizeBinary(const char *file, ModelType &recognized) {
FILE: src/kenlm/lm/binary_format.hh
type lm (line 17) | namespace lm {
type ngram (line 18) | namespace ngram {
type FixedWidthParameters (line 26) | struct FixedWidthParameters {
type Parameters (line 40) | struct Parameters {
type Backing (line 45) | struct Backing {
type detail (line 63) | namespace detail {
function LoadLM (line 79) | void LoadLM(const char *file, const Config &config, To &to) {
FILE: src/kenlm/lm/blank.hh
type lm (line 9) | namespace lm {
type ngram (line 10) | namespace ngram {
function SetExtension (line 28) | inline void SetExtension(float &backoff) {
function HasExtension (line 33) | inline bool HasExtension(const float &backoff) {
FILE: src/kenlm/lm/build_binary_main.cc
type lm (line 22) | namespace lm {
type ngram (line 23) | namespace ngram {
function Usage (line 26) | void Usage(const char *name, const char *default_mem) {
function ParseFloat (line 60) | float ParseFloat(const char *from) {
function ParseUInt (line 66) | unsigned long int ParseUInt(const char *from) {
function ParseBitCount (line 73) | uint8_t ParseBitCount(const char *from) {
function ParseFileList (line 82) | void ParseFileList(const char *from, std::vector<std::string> &to) {
function ProbingQuantizationUnsupported (line 93) | void ProbingQuantizationUnsupported() {
function main (line 102) | int main(int argc, char *argv[]) {
FILE: src/kenlm/lm/config.cc
type lm (line 5) | namespace lm {
type ngram (line 6) | namespace ngram {
FILE: src/kenlm/lm/config.hh
type lm (line 13) | namespace lm {
class EnumerateVocab (line 15) | class EnumerateVocab
type ngram (line 17) | namespace ngram {
type Config (line 19) | struct Config {
type ARPALoadComplain (line 74) | enum ARPALoadComplain {ALL, EXPENSIVE, NONE}
type WriteMethod (line 81) | enum WriteMethod {
type RestFunction (line 92) | enum RestFunction {
FILE: src/kenlm/lm/enumerate_vocab.hh
type lm (line 7) | namespace lm {
class EnumerateVocab (line 15) | class EnumerateVocab {
method EnumerateVocab (line 22) | EnumerateVocab() {}
FILE: src/kenlm/lm/facade.hh
type lm (line 9) | namespace lm {
type base (line 10) | namespace base {
class ModelFacade (line 14) | class ModelFacade : public Model {
method Score (line 20) | float Score(const State &in_state, const WordIndex new_word, State...
method FullScoreReturn (line 25) | FullScoreReturn FullScore(const void *in_state, const WordIndex ne...
method Score (line 31) | float Score(const void *in_state, const WordIndex new_word, void *...
method State (line 38) | const State &BeginSentenceState() const { return begin_sentence_; }
method State (line 39) | const State &NullContextState() const { return null_context_; }
method Vocabulary (line 40) | const Vocabulary &GetVocabulary() const { return *static_cast<cons...
method ModelFacade (line 43) | ModelFacade() : Model(sizeof(State)) {}
method Init (line 48) | void Init(const State &begin_sentence, const State &null_context, ...
FILE: src/kenlm/lm/fragment_main.cc
function Query (line 6) | void Query(const char *name) {
function main (line 19) | int main(int argc, char *argv[]) {
FILE: src/kenlm/lm/kenlm_max_order_main.cc
function main (line 4) | int main(int argc, char *argv[]) {
FILE: src/kenlm/lm/left.hh
type lm (line 49) | namespace lm {
type ngram (line 50) | namespace ngram {
class RuleScore (line 52) | class RuleScore {
method RuleScore (line 54) | explicit RuleScore(const M &model, ChartState &out) : model_(model...
method BeginSentence (line 59) | void BeginSentence() {
method Terminal (line 65) | void Terminal(WordIndex word) {
method BeginNonTerminal (line 81) | void BeginNonTerminal(const ChartState &in, float prob = 0.0) {
method NonTerminal (line 87) | void NonTerminal(const ChartState &in, float prob = 0.0) {
method Finish (line 152) | float Finish() {
method Reset (line 158) | void Reset() {
method Reset (line 164) | void Reset(ChartState &replacement) {
method ExtendLeft (line 170) | bool ExtendLeft(const ChartState &in, unsigned char &next_use, uns...
method ProcessRet (line 190) | void ProcessRet(const FullScoreReturn &ret) {
FILE: src/kenlm/lm/left_test.cc
type lm (line 12) | namespace lm {
type ngram (line 13) | namespace ngram {
function Short (line 22) | void Short(const M &m) {
function Charge (line 61) | void Charge(const M &m) {
function LeftToRight (line 97) | float LeftToRight(const M &m, const std::vector<WordIndex> &words, b...
function RightToLeft (line 107) | float RightToLeft(const M &m, const std::vector<WordIndex> &words, b...
function TreeMiddle (line 130) | float TreeMiddle(const M &m, const std::vector<WordIndex> &words, bo...
function LookupVocab (line 165) | void LookupVocab(const M &m, const StringPiece &str, std::vector<Wor...
function GrowBig (line 179) | void GrowBig(const M &m, bool rest = false) {
function GrowSmall (line 195) | void GrowSmall(const M &m, bool rest = false) {
function AlsoWouldConsiderHigher (line 203) | void AlsoWouldConsiderHigher(const M &m) {
function FullGrow (line 286) | void FullGrow(const M &m) {
function Everything (line 359) | void Everything() {
function BOOST_AUTO_TEST_CASE (line 372) | BOOST_AUTO_TEST_CASE(ProbingAll) {
function BOOST_AUTO_TEST_CASE (line 375) | BOOST_AUTO_TEST_CASE(TrieAll) {
function BOOST_AUTO_TEST_CASE (line 378) | BOOST_AUTO_TEST_CASE(QuantTrieAll) {
function BOOST_AUTO_TEST_CASE (line 381) | BOOST_AUTO_TEST_CASE(ArrayQuantTrieAll) {
function BOOST_AUTO_TEST_CASE (line 384) | BOOST_AUTO_TEST_CASE(ArrayTrieAll) {
function BOOST_AUTO_TEST_CASE (line 388) | BOOST_AUTO_TEST_CASE(RestProbing) {
FILE: src/kenlm/lm/lm_exception.cc
type lm (line 6) | namespace lm {
FILE: src/kenlm/lm/lm_exception.hh
type lm (line 12) | namespace lm {
class ConfigException (line 16) | class ConfigException : public util::Exception {
class LoadException (line 22) | class LoadException : public util::Exception {
class FormatLoadException (line 30) | class FormatLoadException : public LoadException {
class VocabLoadException (line 36) | class VocabLoadException : public LoadException {
class SpecialWordMissingException (line 42) | class SpecialWordMissingException : public VocabLoadException {
FILE: src/kenlm/lm/model.cc
type lm (line 17) | namespace lm {
type ngram (line 18) | namespace ngram {
type detail (line 19) | namespace detail {
function CheckCounts (line 54) | void CheckCounts(const std::vector<uint64_t> &counts) {
function FullScoreReturn (line 114) | FullScoreReturn GenericModel<Search, VocabularyT>::FullScore(const...
function FullScoreReturn (line 122) | FullScoreReturn GenericModel<Search, VocabularyT>::FullScoreForgot...
function FullScoreReturn (line 175) | FullScoreReturn GenericModel<Search, VocabularyT>::ExtendLeft(
function CopyRemainingHistory (line 213) | void CopyRemainingHistory(const WordIndex *from, State &out_state) {
function FullScoreReturn (line 225) | FullScoreReturn GenericModel<Search, VocabularyT>::ScoreExceptBack...
class GenericModel<HashedSearch<BackoffValue>, ProbingVocabulary> (line 299) | class GenericModel<HashedSearch<BackoffValue>, ProbingVocabulary>
class GenericModel<HashedSearch<RestValue>, ProbingVocabulary> (line 300) | class GenericModel<HashedSearch<RestValue>, ProbingVocabulary>
class GenericModel<trie::TrieSearch<DontQuantize, trie::DontBhiksha>, SortedVocabulary> (line 301) | class GenericModel<trie::TrieSearch<DontQuantize, trie::DontBhiksh...
class GenericModel<trie::TrieSearch<DontQuantize, trie::ArrayBhiksha>, SortedVocabulary> (line 302) | class GenericModel<trie::TrieSearch<DontQuantize, trie::ArrayBhiks...
class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::DontBhiksha>, SortedVocabulary> (line 303) | class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::Dont...
class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::ArrayBhiksha>, SortedVocabulary> (line 304) | class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::Arra...
FILE: src/kenlm/lm/model.hh
type util (line 23) | namespace util { class FilePiece; }
class FilePiece (line 23) | class FilePiece
type lm (line 25) | namespace lm {
type ngram (line 26) | namespace ngram {
type detail (line 27) | namespace detail {
class GenericModel (line 31) | class GenericModel : public base::ModelFacade<GenericModel<Search,...
method UnRest (line 101) | float UnRest(const uint64_t *pointers_begin, const uint64_t *poi...
method Backing (line 125) | Backing &MutableBacking() { return backing_; }
FILE: src/kenlm/lm/model_test.cc
type lm (line 13) | namespace lm {
type ngram (line 14) | namespace ngram {
function State (line 42) | State GetState(const Model &model, const char *word, const State &in) {
function Starters (line 66) | void Starters(const M &model) {
function Continuation (line 79) | void Continuation(const M &model) {
function Blanks (line 104) | void Blanks(const M &model) {
function Unknowns (line 144) | void Unknowns(const M &model) {
function MinimalState (line 159) | void MinimalState(const M &model) {
function ExtendLeftTest (line 182) | void ExtendLeftTest(const M &model) {
function Stateless (line 236) | void Stateless(const M &model) {
function NoUnkCheck (line 281) | void NoUnkCheck(const M &model) {
function Everything (line 289) | void Everything(const M &m) {
class ExpectEnumerateVocab (line 299) | class ExpectEnumerateVocab : public EnumerateVocab {
method ExpectEnumerateVocab (line 301) | ExpectEnumerateVocab() {}
method Add (line 303) | void Add(WordIndex index, const StringPiece &str) {
method Check (line 308) | void Check(const base::Vocabulary &vocab) {
method Clear (line 317) | void Clear() {
function LoadingTest (line 324) | void LoadingTest() {
function BOOST_AUTO_TEST_CASE (line 347) | BOOST_AUTO_TEST_CASE(probing) {
function BOOST_AUTO_TEST_CASE (line 350) | BOOST_AUTO_TEST_CASE(trie) {
function BOOST_AUTO_TEST_CASE (line 353) | BOOST_AUTO_TEST_CASE(quant_trie) {
function BOOST_AUTO_TEST_CASE (line 356) | BOOST_AUTO_TEST_CASE(bhiksha_trie) {
function BOOST_AUTO_TEST_CASE (line 359) | BOOST_AUTO_TEST_CASE(quant_bhiksha_trie) {
function BinaryTest (line 363) | void BinaryTest() {
function BOOST_AUTO_TEST_CASE (line 409) | BOOST_AUTO_TEST_CASE(write_and_read_probing) {
function BOOST_AUTO_TEST_CASE (line 412) | BOOST_AUTO_TEST_CASE(write_and_read_rest_probing) {
function BOOST_AUTO_TEST_CASE (line 415) | BOOST_AUTO_TEST_CASE(write_and_read_trie) {
function BOOST_AUTO_TEST_CASE (line 418) | BOOST_AUTO_TEST_CASE(write_and_read_quant_trie) {
function BOOST_AUTO_TEST_CASE (line 421) | BOOST_AUTO_TEST_CASE(write_and_read_array_trie) {
function BOOST_AUTO_TEST_CASE (line 424) | BOOST_AUTO_TEST_CASE(write_and_read_quant_array_trie) {
function BOOST_AUTO_TEST_CASE (line 428) | BOOST_AUTO_TEST_CASE(rest_max) {
FILE: src/kenlm/lm/model_type.hh
type lm (line 4) | namespace lm {
type ngram (line 5) | namespace ngram {
FILE: src/kenlm/lm/ngram_query.hh
type lm (line 14) | namespace lm {
type ngram (line 15) | namespace ngram {
function Query (line 17) | void Query(const Model &model, bool sentence_context, std::istream &...
function Query (line 61) | void Query(const char *file, bool sentence_context, std::istream &in...
FILE: src/kenlm/lm/partial.hh
type lm (line 11) | namespace lm {
type ngram (line 12) | namespace ngram {
type ExtendReturn (line 14) | struct ExtendReturn {
function ExtendReturn (line 20) | ExtendReturn ExtendLoop(
function RevealBefore (line 84) | float RevealBefore(const Model &model, const Right &reveal, const un...
function RevealAfter (line 112) | float RevealAfter(const Model &model, Left &left, Right &right, cons...
function Subsume (line 136) | float Subsume(const Model &model, Left &first_left, const Right &fir...
FILE: src/kenlm/lm/partial_test.cc
type lm (line 11) | namespace lm {
type ngram (line 12) | namespace ngram {
function Config (line 22) | Config SilentConfig() {
type ModelFixture (line 29) | struct ModelFixture {
method ModelFixture (line 30) | ModelFixture() : m(TestLocation(), SilentConfig()) {}
function BOOST_AUTO_TEST_CASE (line 37) | BOOST_AUTO_TEST_CASE(SimpleBefore) {
function BOOST_AUTO_TEST_CASE (line 71) | BOOST_AUTO_TEST_CASE(AlsoWouldConsider) {
function BOOST_AUTO_TEST_CASE (line 110) | BOOST_AUTO_TEST_CASE(EndSentence) {
function ScoreFragment (line 132) | float ScoreFragment(const RestProbingModel &model, unsigned int *beg...
function CheckAdjustment (line 140) | void CheckAdjustment(const RestProbingModel &model, float expect, co...
function FullDivide (line 166) | void FullDivide(const RestProbingModel &model, StringPiece str) {
function BOOST_AUTO_TEST_CASE (line 190) | BOOST_AUTO_TEST_CASE(Strings) {
FILE: src/kenlm/lm/quantize.cc
type lm (line 18) | namespace lm {
type ngram (line 19) | namespace ngram {
function MakeBins (line 23) | void MakeBins(std::vector<float> &values, float *centers, uint32_t b...
FILE: src/kenlm/lm/quantize.hh
type lm (line 17) | namespace lm {
type ngram (line 18) | namespace ngram {
type Config (line 20) | struct Config
class DontQuantize (line 23) | class DontQuantize {
method UpdateConfigFromBinary (line 26) | static void UpdateConfigFromBinary(int, const std::vector<uint64_t...
method Size (line 27) | static uint64_t Size(uint8_t /*order*/, const Config &/*config*/) ...
method MiddleBits (line 28) | static uint8_t MiddleBits(const Config &/*config*/) { return 63; }
method LongestBits (line 29) | static uint8_t LongestBits(const Config &/*config*/) { return 31; }
class MiddlePointer (line 31) | class MiddlePointer {
method MiddlePointer (line 33) | MiddlePointer(const DontQuantize & /*quant*/, unsigned char /*or...
method MiddlePointer (line 35) | MiddlePointer() : address_(NULL, 0) {}
method Found (line 37) | bool Found() const {
method Prob (line 41) | float Prob() const {
method Backoff (line 45) | float Backoff() const {
method Rest (line 49) | float Rest() const { return Prob(); }
method Write (line 51) | void Write(float prob, float backoff) {
class LongestPointer (line 60) | class LongestPointer {
method LongestPointer (line 62) | explicit LongestPointer(const DontQuantize &/*quant*/, util::Bit...
method LongestPointer (line 64) | LongestPointer() : address_(NULL, 0) {}
method Found (line 66) | bool Found() const {
method Prob (line 70) | float Prob() const {
method Write (line 74) | void Write(float prob) {
method DontQuantize (line 82) | DontQuantize() {}
method SetupMemory (line 84) | void SetupMemory(void * /*start*/, unsigned char /*order*/, const ...
method Train (line 88) | void Train(uint8_t /*order*/, std::vector<float> &/*prob*/, std::v...
method TrainProb (line 89) | void TrainProb(uint8_t, std::vector<float> &/*prob*/) {}
method FinishedLoading (line 91) | void FinishedLoading(const Config &) {}
class SeparatelyQuantize (line 94) | class SeparatelyQuantize {
class Bins (line 96) | class Bins {
method Bins (line 99) | Bins() {}
method Bins (line 101) | Bins(uint8_t bits, float *begin) : begin_(begin), end_(begin_ + ...
method EncodeProb (line 105) | uint64_t EncodeProb(float value) const {
method EncodeBackoff (line 109) | uint64_t EncodeBackoff(float value) const {
method Decode (line 116) | float Decode(std::size_t off) const { return begin_[off]; }
method Bits (line 118) | uint8_t Bits() const { return bits_; }
method Mask (line 120) | uint64_t Mask() const { return mask_; }
method Encode (line 123) | uint64_t Encode(float value, size_t reserved) const {
method Size (line 141) | static uint64_t Size(uint8_t order, const Config &config) {
method MiddleBits (line 148) | static uint8_t MiddleBits(const Config &config) { return config.pr...
method LongestBits (line 149) | static uint8_t LongestBits(const Config &config) { return config.p...
class MiddlePointer (line 151) | class MiddlePointer {
method MiddlePointer (line 153) | MiddlePointer(const SeparatelyQuantize &quant, unsigned char ord...
method MiddlePointer (line 155) | MiddlePointer() : address_(NULL, 0) {}
method Found (line 157) | bool Found() const { return address_.base != NULL; }
method Prob (line 159) | float Prob() const {
method Backoff (line 163) | float Backoff() const {
method Rest (line 167) | float Rest() const { return Prob(); }
method Write (line 169) | void Write(float prob, float backoff) const {
method Bins (line 175) | const Bins &ProbBins() const { return bins_[0]; }
method Bins (line 176) | const Bins &BackoffBins() const { return bins_[1]; }
class LongestPointer (line 182) | class LongestPointer {
method LongestPointer (line 184) | LongestPointer(const SeparatelyQuantize &quant, const util::BitA...
method LongestPointer (line 186) | LongestPointer() : address_(NULL, 0) {}
method Found (line 188) | bool Found() const { return address_.base != NULL; }
method Write (line 190) | void Write(float prob) const {
method Prob (line 194) | float Prob() const {
method SeparatelyQuantize (line 203) | SeparatelyQuantize() {}
method Bins (line 215) | const Bins *GetTables(unsigned char order_minus_2) const { return ...
method Bins (line 99) | Bins() {}
method Bins (line 101) | Bins(uint8_t bits, float *begin) : begin_(begin), end_(begin_ + ...
method EncodeProb (line 105) | uint64_t EncodeProb(float value) const {
method EncodeBackoff (line 109) | uint64_t EncodeBackoff(float value) const {
method Decode (line 116) | float Decode(std::size_t off) const { return begin_[off]; }
method Bits (line 118) | uint8_t Bits() const { return bits_; }
method Mask (line 120) | uint64_t Mask() const { return mask_; }
method Encode (line 123) | uint64_t Encode(float value, size_t reserved) const {
method Bins (line 217) | const Bins &LongestTable() const { return longest_; }
method Bins (line 99) | Bins() {}
method Bins (line 101) | Bins(uint8_t bits, float *begin) : begin_(begin), end_(begin_ + ...
method EncodeProb (line 105) | uint64_t EncodeProb(float value) const {
method EncodeBackoff (line 109) | uint64_t EncodeBackoff(float value) const {
method Decode (line 116) | float Decode(std::size_t off) const { return begin_[off]; }
method Bits (line 118) | uint8_t Bits() const { return bits_; }
method Mask (line 120) | uint64_t Mask() const { return mask_; }
method Encode (line 123) | uint64_t Encode(float value, size_t reserved) const {
FILE: src/kenlm/lm/query_main.cc
function main (line 3) | int main(int argc, char *argv[]) {
FILE: src/kenlm/lm/read_arpa.cc
type lm (line 20) | namespace lm {
function IsEntirelyWhiteSpace (line 27) | bool IsEntirelyWhiteSpace(const StringPiece &line) {
function ReadCount (line 37) | uint64_t ReadCount(const std::string &from) {
function ReadARPACounts (line 47) | void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) {
function ReadNGramHeader (line 80) | void ReadNGramHeader(util::FilePiece &in, unsigned int length) {
function ReadBackoff (line 88) | void ReadBackoff(util::FilePiece &in, Prob &/*weights*/) {
function ReadBackoff (line 104) | void ReadBackoff(util::FilePiece &in, float &backoff) {
function ReadEnd (line 133) | void ReadEnd(util::FilePiece &in) {
FILE: src/kenlm/lm/read_arpa.hh
type lm (line 13) | namespace lm {
function ReadBackoff (line 20) | inline void ReadBackoff(util::FilePiece &in, ProbBackoff &weights) {
function ReadBackoff (line 23) | inline void ReadBackoff(util::FilePiece &in, RestWeights &weights) {
class PositiveProbWarn (line 32) | class PositiveProbWarn {
method PositiveProbWarn (line 34) | PositiveProbWarn() : action_(THROW_UP) {}
method PositiveProbWarn (line 36) | explicit PositiveProbWarn(WarningAction action) : action_(action) {}
function Read1Gram (line 44) | void Read1Gram(util::FilePiece &f, Voc &vocab, Weights *unigrams, Posi...
function Read1Grams (line 62) | void Read1Grams(util::FilePiece &f, std::size_t count, Voc &vocab, Wei...
function ReadNGram (line 71) | void ReadNGram(util::FilePiece &f, const unsigned char n, const Voc &v...
FILE: src/kenlm/lm/return.hh
type lm (line 6) | namespace lm {
type FullScoreReturn (line 8) | struct FullScoreReturn {
FILE: src/kenlm/lm/search_hashed.cc
type lm (line 16) | namespace lm {
type ngram (line 17) | namespace ngram {
class ProbingModel (line 19) | class ProbingModel
class ActivateLowerMiddle (line 24) | class ActivateLowerMiddle {
method ActivateLowerMiddle (line 26) | explicit ActivateLowerMiddle(Middle &middle) : modify_(middle) {}
class ActivateUnigram (line 44) | class ActivateUnigram {
method ActivateUnigram (line 46) | explicit ActivateUnigram(Weights *unigram) : modify_(unigram) {}
function FindLower (line 58) | void FindLower(
function AdjustLower (line 81) | void AdjustLower(
function MarkLower (line 139) | void MarkLower(
function ReadNGrams (line 159) | void ReadNGrams(
type detail (line 205) | namespace detail {
class HashedSearch<BackoffValue> (line 289) | class HashedSearch<BackoffValue>
class HashedSearch<RestValue> (line 290) | class HashedSearch<RestValue>
FILE: src/kenlm/lm/search_hashed.hh
type util (line 17) | namespace util { class FilePiece; }
class FilePiece (line 17) | class FilePiece
type lm (line 19) | namespace lm {
type ngram (line 20) | namespace ngram {
type Backing (line 21) | struct Backing
class ProbingVocabulary (line 22) | class ProbingVocabulary
type detail (line 23) | namespace detail {
function CombineWordHash (line 25) | inline uint64_t CombineWordHash(uint64_t current, const WordIndex ...
type ProbEntry (line 32) | struct ProbEntry {
method GetKey (line 37) | uint64_t GetKey() const {
class LongestPointer (line 44) | class LongestPointer {
method LongestPointer (line 46) | explicit LongestPointer(const float &to) : to_(&to) {}
method LongestPointer (line 48) | LongestPointer() : to_(NULL) {}
method Found (line 50) | bool Found() const {
method Prob (line 54) | float Prob() const {
class HashedSearch (line 62) | class HashedSearch {
method UpdateConfigFromBinary (line 75) | static void UpdateConfigFromBinary(int, const std::vector<uint64...
method Size (line 77) | static uint64_t Size(const std::vector<uint64_t> &counts, const ...
method Order (line 91) | unsigned char Order() const {
method UnigramPointer (line 97) | UnigramPointer LookupUnigram(WordIndex word, Node &next, bool &i...
method MiddlePointer (line 106) | MiddlePointer Unpack(uint64_t extend_pointer, unsigned char exte...
method MiddlePointer (line 115) | MiddlePointer LookupMiddle(unsigned char order_minus_2, WordInde...
method LongestPointer (line 128) | LongestPointer LookupLongest(WordIndex word, const Node &node) c...
method FastMakeNode (line 137) | bool FastMakeNode(const WordIndex *begin, const WordIndex *end, ...
class Unigram (line 152) | class Unigram {
method Unigram (line 154) | Unigram() {}
method Unigram (line 156) | Unigram(void *start, uint64_t count, std::size_t /*allocated*/) :
method Size (line 163) | static uint64_t Size(uint64_t count) {
method LoadedBinary (line 176) | void LoadedBinary() {}
FILE: src/kenlm/lm/search_trie.cc
type lm (line 34) | namespace lm {
type ngram (line 35) | namespace ngram {
type trie (line 36) | namespace trie {
function ReadOrThrow (line 39) | void ReadOrThrow(FILE *from, void *data, size_t size) {
function Compare (line 43) | int Compare(unsigned char order, const void *first_void, const voi...
type ProbPointer (line 53) | struct ProbPointer {
class BackoffMessages (line 59) | class BackoffMessages {
method Init (line 61) | void Init(std::size_t entry_size) {
method Add (line 67) | void Add(const WordIndex *to, ProbPointer index) {
method Apply (line 77) | void Apply(float *const *const base, FILE *unigrams) {
method Apply (line 100) | void Apply(float *const *const base, RecordReader &reader) {
method Extends (line 136) | bool Extends(unsigned char order, const WordIndex *words) {
method FinishedAdding (line 154) | void FinishedAdding() {
method Resize (line 164) | void Resize(std::size_t to) {
class SRISucks (line 180) | class SRISucks {
method SRISucks (line 182) | SRISucks() {
method Send (line 187) | void Send(unsigned char begin, unsigned char order, const WordIn...
method ObtainBackoffs (line 198) | void ObtainBackoffs(unsigned char total_order, FILE *unigram_fil...
method ProbBackoff (line 210) | ProbBackoff GetBlank(unsigned char total_order, unsigned char or...
class FindBlanks (line 230) | class FindBlanks {
method FindBlanks (line 232) | FindBlanks(unsigned char order, const ProbBackoff *unigrams, SRI...
method UnigramProb (line 235) | float UnigramProb(WordIndex index) const {
method Unigram (line 239) | void Unigram(WordIndex /*index*/) {
method MiddleBlank (line 243) | void MiddleBlank(const unsigned char order, const WordIndex *ind...
method Middle (line 248) | void Middle(const unsigned char order, const void * /*data*/) {
method Longest (line 252) | void Longest(const void * /*data*/) {
method Cleanup (line 257) | void Cleanup() {
class WriteEntries (line 274) | class WriteEntries {
method WriteEntries (line 276) | WriteEntries(RecordReader *contexts, const Quant &quant, Unigram...
method UnigramProb (line 286) | float UnigramProb(WordIndex index) const { return unigrams_[inde...
method Unigram (line 288) | void Unigram(WordIndex word) {
method MiddleBlank (line 292) | void MiddleBlank(const unsigned char order, const WordIndex *ind...
method Middle (line 297) | void Middle(const unsigned char order, const void *data) {
method Longest (line 308) | void Longest(const void *data) {
method Cleanup (line 313) | void Cleanup() {}
type Gram (line 326) | struct Gram {
method Gram (line 327) | Gram(const WordIndex *in_begin, unsigned char order) : begin(in_...
class BlankManager (line 337) | class BlankManager {
method BlankManager (line 339) | BlankManager(unsigned char total_order, Doing &doing) : total_or...
method Visit (line 343) | void Visit(const WordIndex *to, unsigned char length, float prob) {
function RecursiveInsert (line 384) | void RecursiveInsert(const unsigned char total_order, const WordIn...
function SanityCheckCounts (line 421) | void SanityCheckCounts(const std::vector<uint64_t> &initial, const...
function TrainQuantizer (line 429) | void TrainQuantizer(uint8_t order, uint64_t count, const std::vect...
function TrainProbQuantizer (line 442) | void TrainProbQuantizer(uint8_t order, uint64_t count, RecordReade...
function PopulateUnigramWeights (line 453) | void PopulateUnigramWeights(FILE *file, WordIndex unigram_count, R...
function BuildTrie (line 472) | void BuildTrie(SortedFiles &files, std::vector<uint64_t> &counts, ...
class TrieSearch<DontQuantize, DontBhiksha> (line 605) | class TrieSearch<DontQuantize, DontBhiksha>
class TrieSearch<DontQuantize, ArrayBhiksha> (line 606) | class TrieSearch<DontQuantize, ArrayBhiksha>
class TrieSearch<SeparatelyQuantize, DontBhiksha> (line 607) | class TrieSearch<SeparatelyQuantize, DontBhiksha>
class TrieSearch<SeparatelyQuantize, ArrayBhiksha> (line 608) | class TrieSearch<SeparatelyQuantize, ArrayBhiksha>
FILE: src/kenlm/lm/search_trie.hh
type lm (line 17) | namespace lm {
type ngram (line 18) | namespace ngram {
type Backing (line 19) | struct Backing
class SortedVocabulary (line 20) | class SortedVocabulary
type trie (line 21) | namespace trie {
class TrieSearch (line 23) | class TrieSearch
method UpdateConfigFromBinary (line 41) | static void UpdateConfigFromBinary(int fd, const std::vector<uin...
method Size (line 47) | static uint64_t Size(const std::vector<uint64_t> &counts, const ...
method TrieSearch (line 55) | TrieSearch() : middle_begin_(NULL), middle_end_(NULL) {}
method Order (line 65) | unsigned char Order() const {
method ProbBackoff (line 69) | ProbBackoff &UnknownUnigram() { return unigram_.Unknown(); }
method UnigramPointer (line 71) | UnigramPointer LookupUnigram(WordIndex word, Node &next, bool &i...
method MiddlePointer (line 78) | MiddlePointer Unpack(uint64_t extend_pointer, unsigned char exte...
method MiddlePointer (line 82) | MiddlePointer LookupMiddle(unsigned char order_minus_2, WordInde...
method LongestPointer (line 88) | LongestPointer LookupLongest(WordIndex word, const Node &node) c...
method FastMakeNode (line 92) | bool FastMakeNode(const WordIndex *begin, const WordIndex *end, ...
method FreeMiddles (line 107) | void FreeMiddles() {
class SortedFiles (line 24) | class SortedFiles
class TrieSearch (line 27) | class TrieSearch {
method UpdateConfigFromBinary (line 41) | static void UpdateConfigFromBinary(int fd, const std::vector<uin...
method Size (line 47) | static uint64_t Size(const std::vector<uint64_t> &counts, const ...
method TrieSearch (line 55) | TrieSearch() : middle_begin_(NULL), middle_end_(NULL) {}
method Order (line 65) | unsigned char Order() const {
method ProbBackoff (line 69) | ProbBackoff &UnknownUnigram() { return unigram_.Unknown(); }
method UnigramPointer (line 71) | UnigramPointer LookupUnigram(WordIndex word, Node &next, bool &i...
method MiddlePointer (line 78) | MiddlePointer Unpack(uint64_t extend_pointer, unsigned char exte...
method MiddlePointer (line 82) | MiddlePointer LookupMiddle(unsigned char order_minus_2, WordInde...
method LongestPointer (line 88) | LongestPointer LookupLongest(WordIndex word, const Node &node) c...
method FastMakeNode (line 92) | bool FastMakeNode(const WordIndex *begin, const WordIndex *end, ...
method FreeMiddles (line 107) | void FreeMiddles() {
FILE: src/kenlm/lm/sizes.cc
type lm (line 8) | namespace lm {
type ngram (line 9) | namespace ngram {
function ShowSizes (line 11) | void ShowSizes(const std::vector<uint64_t> &counts, const lm::ngram:...
function ShowSizes (line 51) | void ShowSizes(const std::vector<uint64_t> &counts) {
function ShowSizes (line 56) | void ShowSizes(const char *file, const lm::ngram::Config &config) {
FILE: src/kenlm/lm/sizes.hh
type lm (line 8) | namespace lm { namespace ngram {
type ngram (line 8) | namespace ngram {
type Config (line 10) | struct Config
FILE: src/kenlm/lm/state.hh
type lm (line 10) | namespace lm {
type ngram (line 11) | namespace ngram {
class State (line 15) | class State {
method Compare (line 23) | int Compare(const State &other) const {
method ZeroRemaining (line 34) | void ZeroRemaining() {
method Length (line 41) | unsigned char Length() const { return length; }
function hash_value (line 52) | inline uint64_t hash_value(const State &state, uint64_t seed = 0) {
type Left (line 56) | struct Left {
method Compare (line 63) | int Compare(const Left &other) const {
method ZeroRemaining (line 76) | void ZeroRemaining() {
function hash_value (line 86) | inline uint64_t hash_value(const Left &left) {
type ChartState (line 93) | struct ChartState {
method Compare (line 98) | int Compare(const ChartState &other) const {
method ZeroRemaining (line 108) | void ZeroRemaining() {
function hash_value (line 117) | inline uint64_t hash_value(const ChartState &state) {
FILE: src/kenlm/lm/trie.cc
type lm (line 10) | namespace lm {
type ngram (line 11) | namespace ngram {
type trie (line 12) | namespace trie {
class KeyAccessor (line 15) | class KeyAccessor {
method KeyAccessor (line 17) | KeyAccessor(const void *base, uint64_t key_mask, uint8_t key_bit...
method Key (line 22) | Key operator()(uint64_t index) const {
function FindBitPacked (line 32) | bool FindBitPacked(const void *base, uint64_t key_mask, uint8_t ke...
class BitPackedMiddle<DontBhiksha> (line 123) | class BitPackedMiddle<DontBhiksha>
class BitPackedMiddle<ArrayBhiksha> (line 124) | class BitPackedMiddle<ArrayBhiksha>
FILE: src/kenlm/lm/trie.hh
type lm (line 12) | namespace lm {
type ngram (line 13) | namespace ngram {
type Config (line 14) | struct Config
type trie (line 15) | namespace trie {
type NodeRange (line 17) | struct NodeRange {
type UnigramValue (line 22) | struct UnigramValue {
method Next (line 25) | uint64_t Next() const { return next; }
class UnigramPointer (line 28) | class UnigramPointer {
method UnigramPointer (line 30) | explicit UnigramPointer(const ProbBackoff &to) : to_(&to) {}
method UnigramPointer (line 32) | UnigramPointer() : to_(NULL) {}
method Found (line 34) | bool Found() const { return to_ != NULL; }
method Prob (line 36) | float Prob() const { return to_->prob; }
method Backoff (line 37) | float Backoff() const { return to_->backoff; }
method Rest (line 38) | float Rest() const { return Prob(); }
class Unigram (line 44) | class Unigram {
method Unigram (line 46) | Unigram() {}
method Init (line 48) | void Init(void *start) {
method Size (line 52) | static uint64_t Size(uint64_t count) {
method ProbBackoff (line 57) | const ProbBackoff &Lookup(WordIndex index) const { return unigra...
method ProbBackoff (line 59) | ProbBackoff &Unknown() { return unigram_[0].weights; }
method UnigramValue (line 61) | UnigramValue *Raw() {
method LoadedBinary (line 65) | void LoadedBinary() {}
method UnigramPointer (line 67) | UnigramPointer Find(WordIndex word, NodeRange &next) const {
class BitPacked (line 78) | class BitPacked {
method BitPacked (line 80) | BitPacked() {}
method InsertIndex (line 82) | uint64_t InsertIndex() const {
class BitPackedMiddle (line 100) | class BitPackedMiddle : public BitPacked {
method LoadedBinary (line 111) | void LoadedBinary() { bhiksha_.LoadedBinary(); }
method ReadEntry (line 115) | util::BitAddress ReadEntry(uint64_t pointer, NodeRange &range) {
class BitPackedLongest (line 129) | class BitPackedLongest : public BitPacked {
method Size (line 131) | static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint6...
method BitPackedLongest (line 135) | BitPackedLongest() {}
method Init (line 137) | void Init(void *base, uint8_t quant_bits, uint64_t max_vocab) {
method LoadedBinary (line 141) | void LoadedBinary() {}
FILE: src/kenlm/lm/trie_sort.cc
type lm (line 22) | namespace lm {
type ngram (line 23) | namespace ngram {
type trie (line 24) | namespace trie {
class PartialViewProxy (line 30) | class PartialViewProxy {
method PartialViewProxy (line 32) | PartialViewProxy() : attention_size_(0), inner_() {}
method PartialViewProxy (line 34) | PartialViewProxy(void *ptr, std::size_t block_size, std::size_t ...
method PartialViewProxy (line 40) | PartialViewProxy &operator=(const PartialViewProxy &from) {
method PartialViewProxy (line 45) | PartialViewProxy &operator=(const std::string &from) {
method InnerIterator (line 61) | InnerIterator &Inner() { return inner_; }
method InnerIterator (line 62) | const InnerIterator &Inner() const { return inner_; }
function FILE (line 68) | FILE *DiskFlush(const void *mem_begin, const void *mem_end, const ...
function FILE (line 74) | FILE *WriteContextFile(uint8_t *begin, uint8_t *end, const std::st...
class Closer (line 210) | class Closer {
method Closer (line 212) | explicit Closer(std::deque<FILE*> &files) : files_(files) {}
method PopFront (line 220) | void PopFront() {
FILE: src/kenlm/lm/trie_sort.hh
type util (line 19) | namespace util {
class FilePiece (line 20) | class FilePiece
type lm (line 23) | namespace lm {
class PositiveProbWarn (line 24) | class PositiveProbWarn
type ngram (line 25) | namespace ngram {
class SortedVocabulary (line 26) | class SortedVocabulary
type Config (line 27) | struct Config
type trie (line 29) | namespace trie {
class EntryCompare (line 31) | class EntryCompare : public std::binary_function<const void*, cons...
method EntryCompare (line 33) | explicit EntryCompare(unsigned char order) : order_(order) {}
class RecordReader (line 49) | class RecordReader {
method RecordReader (line 51) | RecordReader() : remains_(true) {}
method RecordReader (line 58) | RecordReader &operator++() {
method EntrySize (line 71) | std::size_t EntrySize() const { return entry_size_; }
class SortedFiles (line 85) | class SortedFiles {
method StealUnigram (line 90) | int StealUnigram() {
method FILE (line 94) | FILE *Full(unsigned char order) {
method FILE (line 98) | FILE *Context(unsigned char of_order) {
FILE: src/kenlm/lm/value.hh
type lm (line 11) | namespace lm {
type ngram (line 12) | namespace ngram {
class GenericProbingProxy (line 15) | class GenericProbingProxy {
method GenericProbingProxy (line 17) | explicit GenericProbingProxy(const Weights &to) : to_(&to) {}
method GenericProbingProxy (line 19) | GenericProbingProxy() : to_(0) {}
method Found (line 21) | bool Found() const { return to_ != 0; }
method Prob (line 23) | float Prob() const {
method Backoff (line 30) | float Backoff() const { return to_->backoff; }
method IndependentLeft (line 32) | bool IndependentLeft() const {
class GenericTrieUnigramProxy (line 43) | class GenericTrieUnigramProxy {
method GenericTrieUnigramProxy (line 45) | explicit GenericTrieUnigramProxy(const Weights &to) : to_(&to) {}
method GenericTrieUnigramProxy (line 47) | GenericTrieUnigramProxy() : to_(0) {}
method Found (line 49) | bool Found() const { return to_ != 0; }
method Prob (line 50) | float Prob() const { return to_->prob; }
method Backoff (line 51) | float Backoff() const { return to_->backoff; }
method Rest (line 52) | float Rest() const { return Prob(); }
type BackoffValue (line 58) | struct BackoffValue {
class ProbingProxy (line 62) | class ProbingProxy : public GenericProbingProxy<Weights> {
method ProbingProxy (line 64) | explicit ProbingProxy(const Weights &to) : GenericProbingProxy<W...
method ProbingProxy (line 65) | ProbingProxy() {}
method Rest (line 66) | float Rest() const { return Prob(); }
class TrieUnigramProxy (line 69) | class TrieUnigramProxy : public GenericTrieUnigramProxy<Weights> {
method TrieUnigramProxy (line 71) | explicit TrieUnigramProxy(const Weights &to) : GenericTrieUnigra...
method TrieUnigramProxy (line 72) | TrieUnigramProxy() {}
method Rest (line 73) | float Rest() const { return Prob(); }
type ProbingEntry (line 76) | struct ProbingEntry {
method GetKey (line 81) | uint64_t GetKey() const { return key; }
type TrieUnigramValue (line 84) | struct TrieUnigramValue {
method Next (line 87) | uint64_t Next() const { return next; }
method Callback (line 92) | void Callback(const Config &, unsigned int, typename Model::Vocabu...
type RestValue (line 98) | struct RestValue {
class ProbingProxy (line 102) | class ProbingProxy : public GenericProbingProxy<RestWeights> {
method ProbingProxy (line 104) | explicit ProbingProxy(const Weights &to) : GenericProbingProxy<R...
method ProbingProxy (line 105) | ProbingProxy() {}
method Rest (line 106) | float Rest() const { return to_->rest; }
class TrieUnigramProxy (line 109) | class TrieUnigramProxy : public GenericTrieUnigramProxy<Weights> {
method TrieUnigramProxy (line 111) | explicit TrieUnigramProxy(const Weights &to) : GenericTrieUnigra...
method TrieUnigramProxy (line 112) | TrieUnigramProxy() {}
method Rest (line 113) | float Rest() const { return to_->rest; }
type ProbingEntry (line 119) | struct ProbingEntry {
method Key (line 124) | Key GetKey() const { return key; }
type TrieUnigramValue (line 127) | struct TrieUnigramValue {
method Next (line 130) | uint64_t Next() const { return next; }
method Callback (line 136) | void Callback(const Config &config, unsigned int order, typename M...
FILE: src/kenlm/lm/value_build.cc
type lm (line 6) | namespace lm {
type ngram (line 7) | namespace ngram {
class LowerRestBuild<ProbingModel> (line 55) | class LowerRestBuild<ProbingModel>
FILE: src/kenlm/lm/value_build.hh
type lm (line 10) | namespace lm {
type ngram (line 11) | namespace ngram {
type Config (line 13) | struct Config
type BackoffValue (line 14) | struct BackoffValue
type RestValue (line 15) | struct RestValue
class NoRestBuild (line 17) | class NoRestBuild {
method NoRestBuild (line 21) | NoRestBuild() {}
method SetRest (line 23) | void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/...
method SetRest (line 24) | void SetRest(const WordIndex *, unsigned int, const ProbBackoff &)...
method MarkExtends (line 26) | bool MarkExtends(ProbBackoff &weights, const Second &) const {
class MaxRestBuild (line 35) | class MaxRestBuild {
method MaxRestBuild (line 39) | MaxRestBuild() {}
method SetRest (line 41) | void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/...
method SetRest (line 42) | void SetRest(const WordIndex *, unsigned int, RestWeights &weights...
method MarkExtends (line 47) | bool MarkExtends(RestWeights &weights, const RestWeights &to) const {
method MarkExtends (line 53) | bool MarkExtends(RestWeights &weights, const Prob &to) const {
class LowerRestBuild (line 64) | class LowerRestBuild {
method SetRest (line 72) | void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/...
method SetRest (line 73) | void SetRest(const WordIndex *vocab_ids, unsigned int n, RestWeigh...
method MarkExtends (line 82) | bool MarkExtends(RestWeights &weights, const Second &) const {
FILE: src/kenlm/lm/virtual_interface.cc
type lm (line 5) | namespace lm {
type base (line 6) | namespace base {
FILE: src/kenlm/lm/virtual_interface.hh
type lm (line 10) | namespace lm {
type base (line 11) | namespace base {
class ModelFacade (line 13) | class ModelFacade
class Vocabulary (line 28) | class Vocabulary {
method WordIndex (line 32) | WordIndex BeginSentence() const { return begin_sentence_; }
method WordIndex (line 33) | WordIndex EndSentence() const { return end_sentence_; }
method WordIndex (line 34) | WordIndex NotFound() const { return not_found_; }
method WordIndex (line 41) | virtual WordIndex Index(const std::string &str) const {
method WordIndex (line 44) | virtual WordIndex Index(const char *str) const {
method Vocabulary (line 50) | Vocabulary() {}
method Vocabulary (line 52) | Vocabulary(WordIndex begin_sentence, WordIndex end_sentence, WordI...
class Model (line 116) | class Model {
method StateSize (line 120) | size_t StateSize() const { return state_size_; }
method Order (line 130) | unsigned char Order() const { return order_; }
method Vocabulary (line 132) | const Vocabulary &BaseVocabulary() const { return *base_vocab_; }
method Model (line 136) | explicit Model(size_t state_size) : state_size_(state_size) {}
FILE: src/kenlm/lm/weights.hh
type lm (line 6) | namespace lm {
type Prob (line 7) | struct Prob {
type ProbBackoff (line 11) | struct ProbBackoff {
type RestWeights (line 15) | struct RestWeights {
FILE: src/kenlm/lm/word_index.hh
type lm (line 7) | namespace lm {
FILE: src/kenlm/util/bit_packing.cc
type util (line 6) | namespace util {
type StaticCheck (line 9) | struct StaticCheck {}
type StaticCheck<true> (line 10) | struct StaticCheck<true> { typedef bool StaticAssertionPassed; }
function RequiredBits (line 17) | uint8_t RequiredBits(uint64_t max_value) {
function BitPackingSanity (line 24) | void BitPackingSanity() {
FILE: src/kenlm/util/bit_packing.hh
type util (line 34) | namespace util {
function BitPackShift (line 38) | inline uint8_t BitPackShift(uint8_t bit, uint8_t /*length*/) {
function BitPackShift (line 42) | inline uint8_t BitPackShift(uint8_t bit, uint8_t length) {
function ReadOff (line 49) | inline uint64_t ReadOff(const void *base, uint64_t bit_off) {
function ReadInt57 (line 64) | inline uint64_t ReadInt57(const void *base, uint64_t bit_off, uint8_t ...
function WriteInt57 (line 70) | inline void WriteInt57(void *base, uint64_t bit_off, uint8_t length, u...
function ReadInt25 (line 84) | inline uint32_t ReadInt25(const void *base, uint64_t bit_off, uint8_t ...
function WriteInt25 (line 95) | inline void WriteInt25(void *base, uint64_t bit_off, uint8_t length, u...
function ReadFloat32 (line 110) | inline float ReadFloat32(const void *base, uint64_t bit_off) {
function WriteFloat32 (line 115) | inline void WriteFloat32(void *base, uint64_t bit_off, float value) {
function SetSign (line 123) | inline void SetSign(float &to) {
function UnsetSign (line 130) | inline void UnsetSign(float &to) {
function ReadNonPositiveFloat31 (line 137) | inline float ReadNonPositiveFloat31(const void *base, uint64_t bit_off) {
function WriteNonPositiveFloat31 (line 144) | inline void WriteNonPositiveFloat31(void *base, uint64_t bit_off, floa...
type BitsMask (line 157) | struct BitsMask {
method BitsMask (line 158) | static BitsMask ByMax(uint64_t max_value) {
method BitsMask (line 163) | static BitsMask ByBits(uint8_t bits) {
method FromMax (line 169) | void FromMax(uint64_t max_value) {
type BitAddress (line 177) | struct BitAddress {
method BitAddress (line 178) | BitAddress(void *in_base, uint64_t in_offset) : base(in_base), offse...
FILE: src/kenlm/util/bit_packing_test.cc
type util (line 8) | namespace util {
function BOOST_AUTO_TEST_CASE (line 14) | BOOST_AUTO_TEST_CASE(ZeroBit57) {
function BOOST_AUTO_TEST_CASE (line 21) | BOOST_AUTO_TEST_CASE(EachBit57) {
function BOOST_AUTO_TEST_CASE (line 30) | BOOST_AUTO_TEST_CASE(Consecutive57) {
function BOOST_AUTO_TEST_CASE (line 42) | BOOST_AUTO_TEST_CASE(Consecutive25) {
function BOOST_AUTO_TEST_CASE (line 54) | BOOST_AUTO_TEST_CASE(Sanity) {
FILE: src/kenlm/util/double-conversion/bignum-dtoa.cc
type double_conversion (line 35) | namespace double_conversion {
function NormalizedExponent (line 37) | static int NormalizedExponent(uint64_t significand, int exponent) {
function BignumDtoa (line 89) | void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
function GenerateShortestDigits (line 185) | static void GenerateShortestDigits(Bignum* numerator, Bignum* denomina...
function GenerateCountedDigits (line 283) | static void GenerateCountedDigits(int count, int* decimal_point,
function BignumToFixed (line 325) | static void BignumToFixed(int requested_digits, int* decimal_point,
function EstimatePower (line 384) | static int EstimatePower(int exponent) {
function InitialScaledStartValuesPositiveExponent (line 416) | static void InitialScaledStartValuesPositiveExponent(
function InitialScaledStartValuesNegativeExponentPositivePower (line 449) | static void InitialScaledStartValuesNegativeExponentPositivePower(
function InitialScaledStartValuesNegativeExponentNegativePower (line 483) | static void InitialScaledStartValuesNegativeExponentNegativePower(
function InitialScaledStartValues (line 567) | static void InitialScaledStartValues(uint64_t significand,
function FixupMultiply10 (line 611) | static void FixupMultiply10(int estimated_power, bool is_even,
FILE: src/kenlm/util/double-conversion/bignum-dtoa.h
function namespace (line 33) | namespace double_conversion {
FILE: src/kenlm/util/double-conversion/bignum.cc
type double_conversion (line 31) | namespace double_conversion {
function BitSize (line 42) | static int BitSize(S value) {
function ReadUInt64 (line 88) | static uint64_t ReadUInt64(Vector<const char> buffer,
function HexCharValue (line 122) | static int HexCharValue(char c) {
function SizeInHexChars (line 550) | static int SizeInHexChars(S number) {
function HexCharOfValue (line 561) | static char HexCharOfValue(int value) {
FILE: src/kenlm/util/double-conversion/bignum.h
function namespace (line 33) | namespace double_conversion {
FILE: src/kenlm/util/double-conversion/cached-powers.cc
type double_conversion (line 36) | namespace double_conversion {
type CachedPower (line 38) | struct CachedPower {
FILE: src/kenlm/util/double-conversion/cached-powers.h
function namespace (line 33) | namespace double_conversion {
FILE: src/kenlm/util/double-conversion/diy-fp.cc
type double_conversion (line 32) | namespace double_conversion {
FILE: src/kenlm/util/double-conversion/diy-fp.h
function namespace (line 33) | namespace double_conversion {
FILE: src/kenlm/util/double-conversion/double-conversion.cc
type double_conversion (line 40) | namespace double_conversion {
function DoubleToStringConverter (line 42) | const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConv...
function BignumDtoaMode (line 341) | static BignumDtoaMode DtoaToBignumDtoaMode(
function ConsumeSubString (line 420) | static bool ConsumeSubString(const char** current,
function AdvanceToNonspace (line 444) | static inline bool AdvanceToNonspace(const char** current, const char*...
function isDigit (line 453) | static bool isDigit(int x, int radix) {
function SignedZero (line 460) | static double SignedZero(bool sign) {
function RadixStringToIeee (line 467) | static double RadixStringToIeee(const char* current,
FILE: src/kenlm/util/double-conversion/double-conversion.h
function namespace (line 33) | namespace double_conversion {
function class (line 381) | class StringToDoubleConverter {
function StringToFloat (line 512) | float StringToFloat(const char* buffer,
FILE: src/kenlm/util/double-conversion/fast-dtoa.cc
type double_conversion (line 34) | namespace double_conversion {
function RoundWeed (line 61) | static bool RoundWeed(Vector<char> buffer,
function RoundWeedCounted (line 181) | static bool RoundWeedCounted(Vector<char> buffer,
function BiggestPowerTen (line 240) | static void BiggestPowerTen(uint32_t number,
function DigitGen (line 303) | static bool DigitGen(DiyFp low,
function DigitGenCounted (line 429) | static bool DigitGenCounted(DiyFp w,
function Grisu3 (line 518) | static bool Grisu3(double v,
function Grisu3Counted (line 590) | static bool Grisu3Counted(double v,
function FastDtoa (line 634) | bool FastDtoa(double v,
FILE: src/kenlm/util/double-conversion/fast-dtoa.h
function namespace (line 33) | namespace double_conversion {
FILE: src/kenlm/util/double-conversion/fixed-dtoa.cc
type double_conversion (line 33) | namespace double_conversion {
class UInt128 (line 37) | class UInt128 {
method UInt128 (line 39) | UInt128() : high_bits_(0), low_bits_(0) { }
method UInt128 (line 40) | UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(l...
method Multiply (line 42) | void Multiply(uint32_t multiplicand) {
method Shift (line 59) | void Shift(int shift_amount) {
method DivModPowerOf2 (line 82) | int DivModPowerOf2(int power) {
method IsZero (line 97) | bool IsZero() const {
method BitAt (line 101) | int BitAt(int position) {
function FillDigits32FixedLength (line 120) | static void FillDigits32FixedLength(uint32_t number, int requested_len...
function FillDigits32 (line 130) | static void FillDigits32(uint32_t number, Vector<char> buffer, int* le...
function FillDigits64FixedLength (line 153) | static void FillDigits64FixedLength(uint64_t number, int requested_len...
function FillDigits64 (line 168) | static void FillDigits64(uint64_t number, Vector<char> buffer, int* le...
function RoundUp (line 189) | static void RoundUp(Vector<char> buffer, int* length, int* decimal_poi...
function FillFractionals (line 230) | static void FillFractionals(uint64_t fractionals, int exponent,
function TrimZeros (line 289) | static void TrimZeros(Vector<char> buffer, int* length, int* decimal_p...
function FastFixedDtoa (line 307) | bool FastFixedDtoa(double v,
FILE: src/kenlm/util/double-conversion/fixed-dtoa.h
function namespace (line 33) | namespace double_conversion {
FILE: src/kenlm/util/double-conversion/ieee.h
function namespace (line 33) | namespace double_conversion {
function IsDenormal (line 131) | bool IsDenormal() const {
function IsNan (line 143) | bool IsNan() const {
function DiyFp (line 162) | DiyFp UpperBoundary() const {
function SignificandSizeForOrderOfMagnitude (line 208) | static int SignificandSizeForOrderOfMagnitude(int order) {
function Infinity (line 216) | static double Infinity() {
function NaN (line 220) | static double NaN() {
function DiyFpToUint64 (line 233) | static uint64_t DiyFpToUint64(DiyFp diy_fp) {
function class (line 261) | class Single {
function IsDenormal (line 307) | bool IsDenormal() const {
function IsNan (line 319) | bool IsNan() const {
function NormalizedBoundaries (line 340) | void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
function DiyFp (line 358) | DiyFp UpperBoundary() const {
function Infinity (line 378) | static float Infinity() {
function NaN (line 382) | static float NaN() {
FILE: src/kenlm/util/double-conversion/strtod.cc
type double_conversion (line 36) | namespace double_conversion {
function TrimLeadingZeros (line 91) | static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
function TrimTrailingZeros (line 101) | static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
function CutToMaxSignificantDigits (line 111) | static void CutToMaxSignificantDigits(Vector<const char> buffer,
function TrimAndCut (line 133) | static void TrimAndCut(Vector<const char> buffer, int exponent,
function ReadUint64 (line 157) | static uint64_t ReadUint64(Vector<const char> buffer,
function ReadDiyFp (line 175) | static void ReadDiyFp(Vector<const char> buffer,
function DoubleStrtod (line 196) | static bool DoubleStrtod(Vector<const char> trimmed,
function DiyFp (line 250) | static DiyFp AdjustmentPowerOfTen(int exponent) {
function DiyFpStrtod (line 274) | static bool DiyFpStrtod(Vector<const char> buffer,
function CompareBufferWithDiyFp (line 392) | static int CompareBufferWithDiyFp(Vector<const char> buffer,
function ComputeGuess (line 423) | static bool ComputeGuess(Vector<const char> trimmed, int exponent,
function Strtod (line 448) | double Strtod(Vector<const char> buffer, int exponent) {
function Strtof (line 474) | float Strtof(Vector<const char> buffer, int exponent) {
FILE: src/kenlm/util/double-conversion/strtod.h
function namespace (line 33) | namespace double_conversion {
FILE: src/kenlm/util/double-conversion/utils.h
type __int64 (line 83) | typedef __int64 int64_t;
function namespace (line 129) | namespace double_conversion {
FILE: src/kenlm/util/ersatz_progress.cc
type util (line 8) | namespace util {
FILE: src/kenlm/util/ersatz_progress.hh
type util (line 12) | namespace util {
class ErsatzProgress (line 16) | class ErsatzProgress {
method ErsatzProgress (line 26) | ErsatzProgress &operator++() {
method ErsatzProgress (line 31) | ErsatzProgress &operator+=(uint64_t amount) {
method Set (line 36) | void Set(uint64_t to) {
method Finished (line 40) | void Finished() {
FILE: src/kenlm/util/exception.cc
type util (line 10) | namespace util {
function Exception (line 19) | Exception &Exception::operator=(const Exception &from) {
FILE: src/kenlm/util/exception.hh
type util (line 11) | namespace util {
class Exception (line 15) | class Exception : public std::exception {
type ExceptionTag (line 38) | struct ExceptionTag {
class ErrnoException (line 102) | class ErrnoException : public Exception {
method Error (line 108) | int Error() const throw() { return errno_; }
class OverflowException (line 115) | class OverflowException : public Exception {
function CheckOverflowInternal (line 121) | inline std::size_t CheckOverflowInternal(uint64_t value) {
function CheckOverflow (line 130) | inline std::size_t CheckOverflow(uint64_t value) {
FILE: src/kenlm/util/fake_ofstream.hh
type util (line 14) | namespace util {
class FakeOFStream (line 15) | class FakeOFStream {
method FakeOFStream (line 20) | explicit FakeOFStream(int out)
method FakeOFStream (line 31) | FakeOFStream &operator<<(float value) {
method FakeOFStream (line 38) | FakeOFStream &operator<<(double value) {
method FakeOFStream (line 44) | FakeOFStream &operator<<(StringPiece str) {
method FakeOFStream (line 56) | FakeOFStream &operator<<(unsigned value) {
method FakeOFStream (line 60) | FakeOFStream &operator<<(char c) {
method Flush (line 67) | void Flush() {
method EnsureRemaining (line 73) | void EnsureRemaining(std::size_t amount) {
FILE: src/kenlm/util/file.cc
type util (line 30) | namespace util {
function OpenReadOrThrow (line 58) | int OpenReadOrThrow(const char *name) {
function CreateOrThrow (line 68) | int CreateOrThrow(const char *name) {
function SizeFile (line 78) | uint64_t SizeFile(int fd) {
function SizeOrThrow (line 96) | uint64_t SizeOrThrow(int fd) {
function ResizeOrThrow (line 102) | void ResizeOrThrow(int fd, uint64_t to) {
function PartialRead (line 114) | std::size_t PartialRead(int fd, void *to, std::size_t amount) {
function ReadOrThrow (line 129) | void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
function ReadOrEOF (line 139) | std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
function PReadOrThrow (line 151) | void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t of...
function WriteOrThrow (line 189) | void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
function WriteOrThrow (line 207) | void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
function FSyncOrThrow (line 212) | void FSyncOrThrow(int fd) {
type CheckOffT (line 223) | struct CheckOffT
type CheckOffT<8> (line 224) | struct CheckOffT<8> {
type True (line 225) | struct True {}
function InternalSeek (line 233) | void InternalSeek(int fd, int64_t off, int whence) {
function SeekOrThrow (line 246) | void SeekOrThrow(int fd, uint64_t off) {
function AdvanceOrThrow (line 250) | void AdvanceOrThrow(int fd, int64_t off) {
function SeekEnd (line 254) | void SeekEnd(int fd) {
function mkstemp_and_unlink (line 292) | int
function mkstemp_and_unlink (line 384) | int
function NormalizeTempPrefix (line 396) | void NormalizeTempPrefix(std::string &base) {
FILE: src/kenlm/util/file.hh
type util (line 12) | namespace util {
class scoped_fd (line 14) | class scoped_fd {
method scoped_fd (line 16) | scoped_fd() : fd_(-1) {}
method scoped_fd (line 18) | explicit scoped_fd(int fd) : fd_(fd) {}
method reset (line 22) | void reset(int to = -1) {
method get (line 27) | int get() const { return fd_; }
method release (line 31) | int release() {
class scoped_FILE (line 44) | class scoped_FILE {
method scoped_FILE (line 46) | explicit scoped_FILE(std::FILE *file = NULL) : file_(file) {}
method reset (line 53) | void reset(std::FILE *to = NULL) {
class FDException (line 69) | class FDException : public ErrnoException {
method FD (line 76) | int FD() const { return fd_; }
class EndOfFileException (line 88) | class EndOfFileException : public Exception {
FILE: src/kenlm/util/file_piece.cc
type util (line 24) | namespace util {
function NamePossiblyFind (line 40) | std::string NamePossiblyFind(int fd, const char *name) {
function StringPiece (line 66) | StringPiece FilePiece::ReadLine(char delim) {
function ParseNumber (line 140) | void ParseNumber(const char *begin, const char *&end, float &out) {
function ParseNumber (line 145) | void ParseNumber(const char *begin, const char *&end, double &out) {
function ParseNumber (line 150) | void ParseNumber(const char *begin, const char *&end, long int &out) {
function ParseNumber (line 155) | void ParseNumber(const char *begin, const char *&end, unsigned long in...
function T (line 162) | T FilePiece::ReadNumber() {
FILE: src/kenlm/util/file_piece.hh
type util (line 17) | namespace util {
class ParseNumberException (line 19) | class ParseNumberException : public Exception {
class FilePiece (line 28) | class FilePiece {
method get (line 44) | char get() {
method StringPiece (line 53) | StringPiece ReadDelimited(const bool *delim = kSpaces) {
method SkipSpaces (line 68) | void SkipSpaces(const bool *delim = kSpaces) {
method Offset (line 75) | uint64_t Offset() const {
method StringPiece (line 88) | StringPiece Consume(const char *to) {
FILE: src/kenlm/util/file_piece_test.cc
type util (line 16) | namespace util {
function FileLocation (line 19) | std::string FileLocation() {
function BOOST_AUTO_TEST_CASE (line 28) | BOOST_AUTO_TEST_CASE(IStream) {
function BOOST_AUTO_TEST_CASE (line 42) | BOOST_AUTO_TEST_CASE(MMapReadLine) {
function BOOST_AUTO_TEST_CASE (line 61) | BOOST_AUTO_TEST_CASE(StreamReadLine) {
function BOOST_AUTO_TEST_CASE (line 88) | BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
function BOOST_AUTO_TEST_CASE (line 112) | BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
FILE: src/kenlm/util/getopt.c
function getopt (line 32) | int
FILE: src/kenlm/util/joint_sort.hh
type util (line 14) | namespace util {
type detail (line 16) | namespace detail {
class JointProxy (line 18) | class JointProxy
method JointProxy (line 60) | JointProxy(const KeyIter &key_iter, const ValueIter &value_iter) :...
method JointProxy (line 61) | JointProxy(const JointProxy<KeyIter, ValueIter> &other) : inner_(o...
method JointProxy (line 70) | JointProxy &operator=(const JointProxy &other) {
method JointProxy (line 76) | JointProxy &operator=(const value_type &other) {
method GetKey (line 82) | typename std::iterator_traits<KeyIter>::reference GetKey() const {
method swap (line 86) | void swap(JointProxy<KeyIter, ValueIter> &other) {
method InnerIterator (line 94) | InnerIterator &Inner() { return inner_; }
method InnerIterator (line 95) | const InnerIterator &Inner() const { return inner_; }
class JointIter (line 20) | class JointIter {
method JointIter (line 22) | JointIter() {}
method JointIter (line 24) | JointIter(const KeyIter &key_iter, const ValueIter &value_iter) : ...
method swap (line 38) | void swap(const JointIter &other) {
class JointProxy (line 49) | class JointProxy {
method JointProxy (line 60) | JointProxy(const KeyIter &key_iter, const ValueIter &value_iter) :...
method JointProxy (line 61) | JointProxy(const JointProxy<KeyIter, ValueIter> &other) : inner_(o...
method JointProxy (line 70) | JointProxy &operator=(const JointProxy &other) {
method JointProxy (line 76) | JointProxy &operator=(const value_type &other) {
method GetKey (line 82) | typename std::iterator_traits<KeyIter>::reference GetKey() const {
method swap (line 86) | void swap(JointProxy<KeyIter, ValueIter> &other) {
method InnerIterator (line 94) | InnerIterator &Inner() { return inner_; }
method InnerIterator (line 95) | const InnerIterator &Inner() const { return inner_; }
class LessWrapper (line 99) | class LessWrapper : public std::binary_function<const typename Proxy...
method LessWrapper (line 101) | explicit LessWrapper(const Less &less) : less_(less) {}
class PairedIterator (line 122) | class PairedIterator : public ProxyIterator<detail::JointProxy<KeyIter...
method PairedIterator (line 124) | PairedIterator(const KeyIter &key, const ValueIter &value) :
function JointSort (line 128) | void JointSort(const KeyIter &key_begin, const KeyIter &key_end, const...
function JointSort (line 135) | void JointSort(const KeyIter &key_begin, const KeyIter &key_end, const...
type std (line 141) | namespace std {
function swap (line 142) | void swap(util::detail::JointIter<KeyIter, ValueIter> &left, util::det...
function swap (line 146) | void swap(util::detail::JointProxy<KeyIter, ValueIter> &left, util::de...
FILE: src/kenlm/util/joint_sort_test.cc
type util (line 6) | namespace util { namespace {
function BOOST_AUTO_TEST_CASE (line 8) | BOOST_AUTO_TEST_CASE(just_flip) {
function BOOST_AUTO_TEST_CASE (line 20) | BOOST_AUTO_TEST_CASE(three) {
function BOOST_AUTO_TEST_CASE (line 32) | BOOST_AUTO_TEST_CASE(char_int) {
FILE: src/kenlm/util/mmap.cc
type util (line 27) | namespace util {
function SizePage (line 29) | long SizePage() {
function SyncOrThrow (line 39) | void SyncOrThrow(void *start, size_t length) {
function UnmapOrThrow (line 47) | void UnmapOrThrow(void *start, size_t length) {
function MapRead (line 136) | void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t s...
function MapAnonymous (line 159) | void MapAnonymous(std::size_t size, util::scoped_memory &to) {
FILE: src/kenlm/util/mmap.hh
type util (line 10) | namespace util {
class scoped_fd (line 12) | class scoped_fd
class scoped_mmap (line 17) | class scoped_mmap {
method scoped_mmap (line 19) | scoped_mmap() : data_((void*)-1), size_(0) {}
method scoped_mmap (line 20) | scoped_mmap(void *data, std::size_t size) : data_(data), size_(size) {}
method size (line 27) | std::size_t size() const { return size_; }
method reset (line 29) | void reset(void *data, std::size_t size) {
method reset (line 35) | void reset() {
class scoped_memory (line 51) | class scoped_memory {
method scoped_memory (line 55) | scoped_memory() : data_(NULL), size_(0), source_(NONE_ALLOCATED) {}
method size (line 62) | std::size_t size() const { return size_; }
method Alloc (line 64) | Alloc source() const { return source_; }
method reset (line 66) | void reset() { reset(NULL, 0, NONE_ALLOCATED); }
FILE: src/kenlm/util/multi_intersection.hh
type util (line 11) | namespace util {
type detail (line 13) | namespace detail {
type RangeLessBySize (line 14) | struct RangeLessBySize : public std::binary_function<const Range &, ...
function FirstIntersectionSorted (line 27) | boost::optional<typename std::iterator_traits<Iterator>::value_type>...
function FirstIntersection (line 52) | boost::optional<typename std::iterator_traits<Iterator>::value_type> F...
function FirstIntersection (line 59) | boost::optional<typename std::iterator_traits<Iterator>::value_type> F...
function AllIntersection (line 63) | void AllIntersection(std::vector<boost::iterator_range<Iterator> > &se...
function AllIntersection (line 74) | void AllIntersection(std::vector<boost::iterator_range<Iterator> > &se...
FILE: src/kenlm/util/multi_intersection_test.cc
type util (line 6) | namespace util {
function BOOST_AUTO_TEST_CASE (line 9) | BOOST_AUTO_TEST_CASE(Empty) {
function BOOST_AUTO_TEST_CASE (line 16) | BOOST_AUTO_TEST_CASE(Single) {
function RangeFromArray (line 30) | boost::iterator_range<const T*> RangeFromArray(const T (&arr)[len]) {
function BOOST_AUTO_TEST_CASE (line 34) | BOOST_AUTO_TEST_CASE(MultiNone) {
function BOOST_AUTO_TEST_CASE (line 47) | BOOST_AUTO_TEST_CASE(MultiOne) {
FILE: src/kenlm/util/murmur_hash.cc
type util (line 16) | namespace util {
function MurmurHash64A (line 26) | uint64_t MurmurHash64A ( const void * key, std::size_t len, uint64_t s...
function MurmurHash64B (line 84) | uint64_t MurmurHash64B ( const void * key, std::size_t len, uint64_t s...
function MurmurHashNativeBackend (line 156) | inline uint64_t MurmurHashNativeBackend(const void * key, std::size_t ...
function MurmurHashNative (line 164) | uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t ...
FILE: src/kenlm/util/murmur_hash.hh
type util (line 6) | namespace util {
FILE: src/kenlm/util/pcqueue.hh
type util (line 11) | namespace util {
function WaitSemaphore (line 13) | inline void WaitSemaphore (boost::interprocess::interprocess_semaphore...
class PCQueue (line 31) | class PCQueue : boost::noncopyable {
method PCQueue (line 33) | explicit PCQueue(size_t size)
method Produce (line 41) | void Produce(const T &val) {
method T (line 58) | T& Consume(T &out) {
method T (line 77) | T Consume() {
FILE: src/kenlm/util/pool.cc
type util (line 7) | namespace util {
FILE: src/kenlm/util/pool.hh
type util (line 11) | namespace util {
class Pool (line 13) | class Pool {
FILE: src/kenlm/util/probing_hash_table.hh
type util (line 14) | namespace util {
class ProbingSizeException (line 17) | class ProbingSizeException : public Exception {
method ProbingSizeException (line 19) | ProbingSizeException() throw() {}
type IdentityHash (line 24) | struct IdentityHash {
method T (line 25) | T operator()(T arg) const { return arg; }
class ProbingHashTable (line 37) | class ProbingHashTable {
method Size (line 47) | static uint64_t Size(uint64_t entries, float multiplier) {
method ProbingHashTable (line 53) | ProbingHashTable() : entries_(0)
method ProbingHashTable (line 59) | ProbingHashTable(void *start, std::size_t allocated, const Key &inva...
method MutableIterator (line 72) | MutableIterator Insert(const T &t) {
method FindOrInsert (line 81) | bool FindOrInsert(const T &t, MutableIterator &out) {
method FinishedInserting (line 98) | void FinishedInserting() {}
method LoadedBinary (line 100) | void LoadedBinary() {}
method UnsafeMutableFind (line 103) | bool UnsafeMutableFind(const Key key, MutableIterator &out) {
method Find (line 115) | bool Find(const Key key, ConstIterator &out) const {
method Clear (line 127) | void Clear() {
method SizeNoSerialization (line 135) | std::size_t SizeNoSerialization() const {
method DoubleTo (line 140) | std::size_t DoubleTo() const {
method Double (line 147) | void Double(void *new_base, bool clear_new = true) {
method CheckConsistency (line 184) | void CheckConsistency() {
method MutableIterator (line 206) | MutableIterator Ideal(const T &t) {
method MutableIterator (line 210) | MutableIterator UncheckedInsert(const T &t) {
FILE: src/kenlm/util/probing_hash_table_test.cc
type util (line 15) | namespace util {
type Entry (line 18) | struct Entry {
method GetKey (line 22) | unsigned char GetKey() const {
method SetKey (line 26) | void SetKey(unsigned char to) {
method GetValue (line 30) | uint64_t GetValue() const {
function BOOST_AUTO_TEST_CASE (line 39) | BOOST_AUTO_TEST_CASE(simple) {
type Entry64 (line 57) | struct Entry64 {
method Entry64 (line 61) | Entry64() {}
method Entry64 (line 63) | explicit Entry64(uint64_t key_in) {
method Key (line 67) | Key GetKey() const { return key; }
method SetKey (line 68) | void SetKey(uint64_t to) { key = to; }
type MurmurHashEntry64 (line 71) | struct MurmurHashEntry64 {
function BOOST_AUTO_TEST_CASE (line 79) | BOOST_AUTO_TEST_CASE(Double) {
FILE: src/kenlm/util/proxy_iterator.hh
type util (line 30) | namespace util {
class ProxyIterator (line 31) | class ProxyIterator {
method ProxyIterator (line 44) | ProxyIterator() {}
method ProxyIterator (line 47) | ProxyIterator(const ProxyIterator<AlternateProxy> &in) : p_(*in) {}
method ProxyIterator (line 48) | explicit ProxyIterator(const Proxy &p) : p_(p) {}
method S (line 51) | S &operator=(const S &other) {
method S (line 63) | S &operator++() { return *this += 1; }
method S (line 64) | S operator++(int) { S ret(*this); ++*this; return ret; }
method S (line 65) | S &operator+=(std::ptrdiff_t amount) { I() += amount; return *this; }
method S (line 66) | S operator+(std::ptrdiff_t amount) const { S ret(*this); ret += amou...
method S (line 68) | S &operator--() { return *this -= 1; }
method S (line 69) | S operator--(int) { S ret(*this); --*this; return ret; }
method S (line 70) | S &operator-=(std::ptrdiff_t amount) { I() += (-amount); return *thi...
method S (line 71) | S operator-(std::ptrdiff_t amount) const { S ret(*this); ret -= amou...
method Proxy (line 75) | Proxy operator*() { return p_; }
method Proxy (line 76) | const Proxy operator*() const { return p_; }
method Proxy (line 77) | Proxy *operator->() { return &p_; }
method Proxy (line 78) | const Proxy *operator->() const { return &p_; }
method Proxy (line 79) | Proxy operator[](std::ptrdiff_t amount) const { return *(*this + amo...
method InnerIterator (line 81) | const InnerIterator &Inner() { return p_.Inner(); }
method InnerIterator (line 84) | InnerIterator &I() { return p_.Inner(); }
method InnerIterator (line 85) | const InnerIterator &I() const { return p_.Inner(); }
FILE: src/kenlm/util/read_compressed.cc
type util (line 27) | namespace util {
class ReadBase (line 41) | class ReadBase {
method ReplaceThis (line 48) | static void ReplaceThis(ReadBase *with, ReadCompressed &thunk) {
class Complete (line 60) | class Complete : public ReadBase {
method Read (line 62) | std::size_t Read(void *, std::size_t, ReadCompressed &) {
class Uncompressed (line 67) | class Uncompressed : public ReadBase {
method Uncompressed (line 69) | explicit Uncompressed(int fd) : fd_(fd) {}
method Read (line 71) | std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
class UncompressedWithHeader (line 81) | class UncompressedWithHeader : public ReadBase {
method UncompressedWithHeader (line 83) | UncompressedWithHeader(int fd, void *already_data, std::size_t alrea...
method Read (line 92) | std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
class GZip (line 112) | class GZip : public ReadBase {
method GZip (line 116) | GZip(int fd, void *already_data, std::size_t already_size)
method Read (line 144) | std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
method ReadInput (line 170) | void ReadInput(ReadCompressed &thunk) {
class BZip (line 184) | class BZip : public ReadBase {
method BZip (line 186) | explicit BZip(int fd, void *already_data, std::size_t already_size) {
method Read (line 214) | std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
class XZip (line 240) | class XZip : public ReadBase {
method XZip (line 244) | XZip(int fd, void *already_data, std::size_t already_size)
method Read (line 272) | std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
method ReadInput (line 307) | void ReadInput(ReadCompressed &thunk) {
class IStreamReader (line 323) | class IStreamReader : public ReadBase {
method IStreamReader (line 325) | explicit IStreamReader(std::istream &stream) : stream_(stream) {}
method Read (line 327) | std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
type MagicResult (line 340) | enum MagicResult {
function MagicResult (line 344) | MagicResult DetectMagic(const void *from_void) {
function ReadBase (line 359) | ReadBase *ReadFactory(int fd, uint64_t &raw_amount) {
method ReplaceThis (line 48) | static void ReplaceThis(ReadBase *with, ReadCompressed &thunk) {
FILE: src/kenlm/util/read_compressed.hh
type util (line 11) | namespace util {
class CompressedException (line 13) | class CompressedException : public Exception {
class GZException (line 19) | class GZException : public CompressedException {
class BZException (line 25) | class BZException : public CompressedException {
class XZException (line 31) | class XZException : public CompressedException {
class ReadBase (line 37) | class ReadBase
class ReadCompressed (line 39) | class ReadCompressed {
method RawAmount (line 65) | uint64_t RawAmount() const { return raw_amount_; }
FILE: src/kenlm/util/read_compressed_test.cc
type util (line 15) | namespace util {
function ReadLoop (line 18) | void ReadLoop(ReadCompressed &reader, void *to_void, std::size_t amoun...
function WriteRandom (line 30) | std::string WriteRandom() {
function VerifyRead (line 40) | void VerifyRead(ReadCompressed &reader) {
function TestRandom (line 53) | void TestRandom(const char *compressor) {
function BOOST_AUTO_TEST_CASE (line 77) | BOOST_AUTO_TEST_CASE(Uncompressed) {
function BOOST_AUTO_TEST_CASE (line 82) | BOOST_AUTO_TEST_CASE(ReadGZ) {
function BOOST_AUTO_TEST_CASE (line 88) | BOOST_AUTO_TEST_CASE(ReadBZ) {
function BOOST_AUTO_TEST_CASE (line 94) | BOOST_AUTO_TEST_CASE(ReadXZ) {
function BOOST_AUTO_TEST_CASE (line 99) | BOOST_AUTO_TEST_CASE(IStream) {
FILE: src/kenlm/util/scoped.cc
type util (line 8) | namespace util {
FILE: src/kenlm/util/scoped.hh
type util (line 8) | namespace util {
class MallocException (line 10) | class MallocException : public ErrnoException {
class scoped_malloc (line 19) | class scoped_malloc {
method scoped_malloc (line 21) | scoped_malloc() : p_(NULL) {}
method scoped_malloc (line 23) | scoped_malloc(void *p) : p_(p) {}
method reset (line 27) | void reset(void *p = NULL) {
class scoped_array (line 45) | class scoped_array {
method scoped_array (line 47) | explicit scoped_array(T *content = NULL) : c_(content) {}
method T (line 51) | T *get() { return c_; }
method T (line 52) | const T* get() const { return c_; }
method T (line 54) | T &operator*() { return *c_; }
method T (line 55) | const T&operator*() const { return *c_; }
method T (line 57) | T &operator[](std::size_t idx) { return c_[idx]; }
method T (line 58) | const T &operator[](std::size_t idx) const { return c_[idx]; }
method reset (line 60) | void reset(T *to = NULL) {
class scoped_ptr (line 72) | class scoped_ptr {
method scoped_ptr (line 74) | explicit scoped_ptr(T *content = NULL) : c_(content) {}
method T (line 78) | T *get() { return c_; }
method T (line 79) | const T* get() const { return c_; }
method T (line 81) | T &operator*() { return *c_; }
method T (line 82) | const T&operator*() const { return *c_; }
method T (line 84) | T *operator->() { return c_; }
method T (line 85) | const T*operator->() const { return c_; }
method T (line 87) | T &operator[](std::size_t idx) { return c_[idx]; }
method T (line 88) | const T &operator[](std::size_t idx) const { return c_[idx]; }
method reset (line 90) | void reset(T *to = NULL) {
FILE: src/kenlm/util/sized_iterator.hh
type util (line 12) | namespace util {
class SizedInnerIterator (line 14) | class SizedInnerIterator {
method SizedInnerIterator (line 16) | SizedInnerIterator() {}
method SizedInnerIterator (line 18) | SizedInnerIterator(void *ptr, std::size_t size) : ptr_(static_cast<u...
method SizedInnerIterator (line 26) | SizedInnerIterator &operator+=(std::ptrdiff_t amount) {
method EntrySize (line 36) | std::size_t EntrySize() const { return size_; }
class SizedProxy (line 43) | class SizedProxy {
method SizedProxy (line 45) | SizedProxy() {}
method SizedProxy (line 47) | SizedProxy(void *ptr, std::size_t size) : inner_(ptr, size) {}
method SizedProxy (line 53) | SizedProxy &operator=(const SizedProxy &from) {
method SizedProxy (line 58) | SizedProxy &operator=(const std::string &from) {
method InnerIterator (line 73) | InnerIterator &Inner() { return inner_; }
method InnerIterator (line 74) | const InnerIterator &Inner() const { return inner_; }
function SizedIterator (line 80) | inline SizedIterator SizedIt(void *ptr, std::size_t size) { return Siz...
class SizedCompare (line 83) | class SizedCompare : public std::binary_function<const Proxy &, const ...
method SizedCompare (line 85) | explicit SizedCompare(const Delegate &delegate = Delegate()) : deleg...
method Delegate (line 100) | const Delegate &GetDelegate() const { return delegate_; }
FILE: src/kenlm/util/sorted_uniform.hh
type util (line 10) | namespace util {
class IdentityAccessor (line 12) | class IdentityAccessor {
method T (line 15) | T operator()(const T *in) const { return *in; }
type Pivot64 (line 18) | struct Pivot64 {
method Calc (line 19) | static inline std::size_t Calc(uint64_t off, uint64_t range, std::si...
type Pivot32 (line 27) | struct Pivot32 {
method Calc (line 28) | static inline std::size_t Calc(uint64_t off, uint64_t range, uint64_...
type PivotSelect (line 34) | struct PivotSelect
type PivotSelect<8> (line 35) | struct PivotSelect<8> { typedef Pivot64 T; }
type PivotSelect<4> (line 36) | struct PivotSelect<4> { typedef Pivot32 T; }
type PivotSelect<2> (line 37) | struct PivotSelect<2> { typedef Pivot32 T; }
function BinaryFind (line 40) | bool BinaryFind(
function BoundedSortedUniformFind (line 65) | bool BoundedSortedUniformFind(
function SortedUniformFind (line 87) | bool SortedUniformFind(const Accessor &accessor, Iterator begin, Itera...
function Iterator (line 105) | Iterator BinaryBelow(
FILE: src/kenlm/util/sorted_uniform_test.cc
type util (line 16) | namespace util {
type Entry (line 19) | struct Entry {
method Key (line 26) | Key GetKey() const {
method Value (line 30) | Value GetValue() const {
type Accessor (line 39) | struct Accessor {
method Key (line 41) | Key operator()(const Entry<Key, Value> *entry) const {
function Check (line 46) | void Check(const Entry<Key, Value> *begin, const Entry<Key, Value> *en...
function BOOST_AUTO_TEST_CASE (line 60) | BOOST_AUTO_TEST_CASE(empty) {
function RandomTest (line 67) | void RandomTest(Key upper, size_t entries, size_t queries) {
function BOOST_AUTO_TEST_CASE (line 102) | BOOST_AUTO_TEST_CASE(basic) {
function BOOST_AUTO_TEST_CASE (line 106) | BOOST_AUTO_TEST_CASE(tiny_dense_random) {
function BOOST_AUTO_TEST_CASE (line 110) | BOOST_AUTO_TEST_CASE(small_dense_random) {
function BOOST_AUTO_TEST_CASE (line 114) | BOOST_AUTO_TEST_CASE(small_sparse_random) {
function BOOST_AUTO_TEST_CASE (line 118) | BOOST_AUTO_TEST_CASE(medium_sparse_random) {
function BOOST_AUTO_TEST_CASE (line 122) | BOOST_AUTO_TEST_CASE(sparse_random) {
FILE: src/kenlm/util/string_piece.cc
function size_type (line 19) | size_type StringPiece::find(const StringPiece& s, size_type pos) const {
function size_type (line 30) | size_type StringPiece::find(char c, size_type pos) const {
function size_type (line 38) | size_type StringPiece::rfind(const StringPiece& s, size_type pos) const {
function size_type (line 48) | size_type StringPiece::rfind(char c, size_type pos) const {
function BuildLookupTable (line 67) | static inline void BuildLookupTable(const StringPiece& characters_wanted,
function size_type (line 76) | size_type StringPiece::find_first_of(const StringPiece& s,
function size_type (line 95) | size_type StringPiece::find_first_not_of(const StringPiece& s,
function size_type (line 117) | size_type StringPiece::find_first_not_of(char c, size_type pos) const {
function size_type (line 129) | size_type StringPiece::find_last_of(const StringPiece& s, size_type pos)...
function size_type (line 148) | size_type StringPiece::find_last_not_of(const StringPiece& s,
function size_type (line 172) | size_type StringPiece::find_last_not_of(char c, size_type pos) const {
function StringPiece (line 185) | StringPiece StringPiece::substr(size_type pos, size_type n) const {
FILE: src/kenlm/util/string_piece.hh
class StringPiece (line 89) | class StringPiece {
method StringPiece (line 101) | StringPiece() : ptr_(NULL), length_(0) { }
method StringPiece (line 102) | StringPiece(const char* str)
method StringPiece (line 104) | StringPiece(const std::string& str)
method StringPiece (line 106) | StringPiece(const char* offset, size_type len)
method size_type (line 114) | size_type size() const { return length_; }
method size_type (line 115) | size_type length() const { return length_; }
method empty (line 116) | bool empty() const { return length_ == 0; }
method clear (line 118) | void clear() { ptr_ = NULL; length_ = 0; }
method set (line 119) | void set(const char* data, size_type len) { ptr_ = data; length_ = len; }
method set (line 120) | void set(const char* str) {
method set (line 124) | void set(const void* data, size_type len) {
method remove_prefix (line 131) | void remove_prefix(size_type n) {
method remove_suffix (line 136) | void remove_suffix(size_type n) {
method compare (line 140) | int compare(const StringPiece& x) const {
method as_string (line 149) | std::string as_string() const {
method starts_with (line 158) | bool starts_with(const StringPiece& x) const {
method ends_with (line 164) | bool ends_with(const StringPiece& x) const {
method iterator (line 180) | iterator begin() const { return ptr_; }
method iterator (line 181) | iterator end() const { return ptr_ + length_; }
method const_reverse_iterator (line 182) | const_reverse_iterator rbegin() const {
method const_reverse_iterator (line 185) | const_reverse_iterator rend() const {
method size_type (line 189) | size_type max_size() const { return length_; }
method size_type (line 190) | size_type capacity() const { return length_; }
method size_type (line 200) | size_type find_first_of(char c, size_type pos = 0) const {
method size_type (line 206) | size_type find_last_of(char c, size_type pos = npos) const {
method wordmemcmp (line 214) | static int wordmemcmp(const char* p, const char* p2, size_type N) {
FILE: src/kenlm/util/string_piece_hash.hh
function hash_value (line 9) | inline size_t hash_value(const StringPiece &str) {
type StringPieceCompatibleHash (line 14) | struct StringPieceCompatibleHash : public std::unary_function<const Stri...
type StringPieceCompatibleEquals (line 20) | struct StringPieceCompatibleEquals : public std::binary_function<const S...
function FindStringPiece (line 25) | typename T::const_iterator FindStringPiece(const T &t, const StringPiece...
function FindStringPiece (line 34) | typename T::iterator FindStringPiece(T &t, const StringPiece &key) {
FILE: src/kenlm/util/thread_pool.hh
type util (line 14) | namespace util {
class Worker (line 16) | class Worker : boost::noncopyable {
method Worker (line 21) | Worker(PCQueue<Request> &in, Construct &construct, Request &poison)
method Join (line 44) | void Join() {
class ThreadPool (line 58) | class ThreadPool : boost::noncopyable {
method ThreadPool (line 63) | ThreadPool(size_t queue_length, size_t workers, Construct handler_co...
method Produce (line 78) | void Produce(const Request &request) {
FILE: src/kenlm/util/tokenize_piece.hh
type util (line 12) | namespace util {
class OutOfTokens (line 15) | class OutOfTokens : public Exception {
method OutOfTokens (line 17) | OutOfTokens() throw() {}
class SingleCharacter (line 21) | class SingleCharacter {
method SingleCharacter (line 23) | SingleCharacter() {}
method SingleCharacter (line 24) | explicit SingleCharacter(char delim) : delim_(delim) {}
method StringPiece (line 26) | StringPiece Find(const StringPiece &in) const {
class MultiCharacter (line 34) | class MultiCharacter {
method MultiCharacter (line 36) | MultiCharacter() {}
method MultiCharacter (line 38) | explicit MultiCharacter(const StringPiece &delimiter) : delimiter_(d...
method StringPiece (line 40) | StringPiece Find(const StringPiece &in) const {
class AnyCharacter (line 48) | class AnyCharacter {
method AnyCharacter (line 50) | AnyCharacter() {}
method AnyCharacter (line 51) | explicit AnyCharacter(const StringPiece &chars) : chars_(chars) {}
method StringPiece (line 53) | StringPiece Find(const StringPiece &in) const {
class AnyCharacterLast (line 61) | class AnyCharacterLast {
method AnyCharacterLast (line 63) | AnyCharacterLast() {}
method AnyCharacterLast (line 65) | explicit AnyCharacterLast(const StringPiece &chars) : chars_(chars) {}
method StringPiece (line 67) | StringPiece Find(const StringPiece &in) const {
class TokenIter (line 75) | class TokenIter : public boost::iterator_facade<TokenIter<Find, SkipEm...
method TokenIter (line 77) | TokenIter() {}
method TokenIter (line 79) | TokenIter(const StringPiece &str, const Construct &construct) : afte...
method end (line 90) | static TokenIter<Find, SkipEmpty> end() {
method increment (line 97) | void increment() {
method equal (line 109) | bool equal(const TokenIter<Find, SkipEmpty> &other) const {
method StringPiece (line 113) | const StringPiece &dereference() const {
FILE: src/kenlm/util/tokenize_piece_test.cc
type util (line 9) | namespace util {
function BOOST_AUTO_TEST_CASE (line 12) | BOOST_AUTO_TEST_CASE(pipe_pipe_none) {
function BOOST_AUTO_TEST_CASE (line 20) | BOOST_AUTO_TEST_CASE(pipe_pipe_two) {
function BOOST_AUTO_TEST_CASE (line 32) | BOOST_AUTO_TEST_CASE(remove_empty) {
function BOOST_AUTO_TEST_CASE (line 38) | BOOST_AUTO_TEST_CASE(remove_empty_keep) {
FILE: src/kenlm/util/usage.cc
type util (line 17) | namespace util {
function FloatSec (line 21) | float FloatSec(const struct timeval &tv) {
function PrintUsage (line 27) | void PrintUsage(std::ostream &out) {
function GuessPhysicalMemory (line 50) | uint64_t GuessPhysicalMemory() {
class SizeParseError (line 65) | class SizeParseError : public Exception {
method SizeParseError (line 67) | explicit SizeParseError(const std::string &str) throw() {
function ParseNum (line 72) | uint64_t ParseNum(const std::string &arg) {
function ParseSize (line 102) | uint64_t ParseSize(const std::string &arg) {
FILE: src/kenlm/util/usage.hh
type util (line 9) | namespace util {
FILE: src/opennlp/ccg/Parse.java
class Parse (line 54) | public class Parse {
method main (line 56) | public static void main(String[] args) throws IOException {
FILE: src/opennlp/ccg/Realize.java
class Realize (line 41) | public class Realize
method main (line 45) | @SuppressWarnings("unchecked")
FILE: src/opennlp/ccg/TextCCG.java
class TextCCG (line 52) | public class TextCCG {
method main (line 83) | @SuppressWarnings("unchecked")
method readFilename (line 614) | private static String readFilename(String s) throws IOException {
method showHelp (line 623) | public static void showHelp() {
method showSettings (line 699) | public static void showSettings(Preferences prefs) {
FILE: src/opennlp/ccg/WebCCG.java
class WebCCG (line 53) | public class WebCCG {
method main (line 55) | public static void main(String[] args) throws IOException, LexException {
FILE: src/opennlp/ccg/alignment/AbstractEncodingScheme.java
class AbstractEncodingScheme (line 32) | public abstract class AbstractEncodingScheme implements EncodingScheme {
method AbstractEncodingScheme (line 76) | protected AbstractEncodingScheme(Character fieldDelimiter, Character m...
method isLineSeparator (line 95) | static boolean isLineSeparator(Character c) {
method isFieldDelimiter (line 105) | public boolean isFieldDelimiter(Character c) {
method isMappingDelimiter (line 114) | public boolean isMappingDelimiter(Character c) {
method isGroupDelimiter (line 123) | public boolean isGroupDelimiter(Character c) {
method getFieldDelimiter (line 130) | public Character getFieldDelimiter() {
method getMappingDelimiter (line 137) | public Character getMappingDelimiter() {
method getGroupDelimiter (line 144) | public Character getGroupDelimiter() {
method getPhraseNumberBase (line 151) | public IndexBase getPhraseNumberBase() {
method getIndexBase (line 158) | public IndexBase getIndexBase() {
method getOrder (line 165) | public List<MappingFormat.Field> getOrder() {
method getRequired (line 172) | public Set<MappingFormat.Field> getRequired() {
method getDefaults (line 179) | public Set<MappingFormat.Field> getDefaults() {
method hashCode (line 186) | @Override
method equals (line 196) | @Override
FILE: src/opennlp/ccg/alignment/Alignment.java
class Alignment (line 64) | public class Alignment extends AbstractSet<Mapping> implements Comparabl...
method Alignment (line 88) | public Alignment(Phrase a, Phrase b, Collection<? extends Mapping> map...
method fromMap (line 117) | public static Alignment fromMap(Phrase a, Phrase b, Map<Integer, Set<I...
method reverse (line 139) | public Alignment reverse() {
method getNumber (line 155) | public Integer getNumber() {
method getA (line 162) | public Phrase getA() {
method getB (line 169) | public Phrase getB() {
method get (line 178) | public Phrase get(PhrasePosition pos) {
method add (line 187) | @Override
method iterator (line 196) | @Override
method size (line 204) | @Override
method compareTo (line 215) | @Override
method equals (line 224) | @Override
method hashCode (line 237) | @Override
method toString (line 245) | @Override
method getTargets (line 269) | public Set<Integer> getTargets(Integer source) {
method getTargets (line 299) | public Set<Integer> getTargets(Integer source, PhrasePosition sourcePo...
method getIndices (line 311) | public Set<Integer> getIndices(PhrasePosition position) {
method asMap (line 320) | public Map<Integer, Set<Integer>> asMap() {
method asMap (line 375) | public Map<Integer, Set<Integer>> asMap(PhrasePosition keyPosition) {
method checkPhrases (line 379) | void checkPhrases(Phrase ap, Phrase bp) {
method checkMapping (line 393) | void checkMapping(Mapping m) {
method checkIndex (line 408) | void checkIndex(Integer index, PhrasePosition intendedPosition) {
class MapView (line 419) | class MapView extends AbstractMap<Integer, Set<Integer>> {
method MapView (line 422) | MapView(PhrasePosition keyPosition) {
method entrySet (line 426) | @Override
class IndexView (line 465) | abstract class IndexView extends AbstractSet<Integer> {
method IndexView (line 472) | IndexView(PhrasePosition indexPosition, Filter<Mapping> indexFilter) {
method indices (line 477) | Set<Mapping> indices() {
method size (line 483) | @Override
method iterator (line 488) | @Override
class KeyView (line 511) | class KeyView extends IndexView {
method KeyView (line 512) | KeyView(final PhrasePosition keyPosition) {
class ValueView (line 522) | class ValueView extends IndexView {
method ValueView (line 523) | ValueView(final Integer key, final PhrasePosition keyPosition) {
FILE: src/opennlp/ccg/alignment/Alignments.java
class Alignments (line 83) | public final class Alignments {
method Alignments (line 209) | private Alignments() {
method tokenize (line 216) | public static String[] tokenize(String s) {
method tokenize (line 224) | public static String[] tokenize(String s, Pattern wordDelimiter) {
method untokenize (line 234) | public static String untokenize(List<String> tokens) {
method untokenize (line 242) | public static String untokenize(List<String> tokens, String delimiter) {
method untokenize (line 253) | public static String untokenize(String[] tokens) {
method untokenize (line 261) | public static String untokenize(String[] tokens, String delimiter) {
method readPhrases (line 279) | public static List<Phrase> readPhrases(File f) throws IOException {
method readPhrases (line 287) | public static List<Phrase> readPhrases(Reader r) throws IOException {
method readPhrases (line 297) | public static List<Phrase> readPhrases(Reader r, IndexBase phraseNumbe...
method readIdentifiedPhrases (line 314) | public static List<Phrase> readIdentifiedPhrases(File f) throws IOExce...
method readIdentifiedPhrases (line 326) | public static List<Phrase> readIdentifiedPhrases(Reader r) throws IOEx...
method readIdentifiedPhrases (line 342) | public static List<Phrase> readIdentifiedPhrases(Reader r, IndexBase p...
method readPhrases (line 363) | public static List<Phrase> readPhrases(PhraseReader reader) throws IOE...
method writeIdentifiedPhrases (line 379) | public static void writeIdentifiedPhrases(List<Phrase> phrases, File f...
method writeIdentifiedPhrases (line 392) | public static void writeIdentifiedPhrases(List<Phrase> phrases, Writer...
method writeIdentifiedPhrases (line 409) | public static void writeIdentifiedPhrases(List<Phrase> phrases, Writer...
method writePhrases (line 427) | public static void writePhrases(List<Phrase> phrases, File f) throws I...
method writePhrases (line 437) | public static void writePhrases(List<Phrase> phrases, Writer w) throws...
method writePhrases (line 446) | public static void writePhrases(List<Phrase> phrases, Writer w, String...
method writePhrases (line 466) | public static void writePhrases(List<Phrase> phrases, PhraseWriter wri...
method readMappings (line 477) | public static Map<Integer, Set<Mapping>> readMappings(File f, MappingF...
method readMappings (line 487) | public static Map<Integer, Set<Mapping>> readMappings(Reader r, Mappin...
method readMappings (line 513) | public static Map<Integer, Set<Mapping>> readMappings(MappingReader re...
method readSortedMappings (line 545) | public static SortedMap<Integer, SortedSet<Mapping>> readSortedMapping...
method readSortedMappings (line 556) | public static SortedMap<Integer, SortedSet<Mapping>> readSortedMapping...
method readSortedMappings (line 579) | public static SortedMap<Integer, SortedSet<Mapping>> readSortedMapping...
method writeMappings (line 597) | public static void writeMappings(Map<Integer, Set<Mapping>> map, File ...
method writeMappings (line 607) | public static void writeMappings(Map<Integer, Set<Mapping>> map, Write...
method writeMappings (line 622) | public static void writeMappings(Map<Integer, Set<Mapping>> map, Mappi...
method readAlignments (line 644) | public static List<Alignment> readAlignments(File phraseA, File phraseB,
method readAlignments (line 655) | public static List<Alignment> readAlignments(Reader phraseA, Reader ph...
method readIdentifiedAlignments (line 668) | public static List<Alignment> readIdentifiedAlignments(File phraseA, F...
method readIdentifiedAlignments (line 681) | public static List<Alignment> readIdentifiedAlignments(Reader phraseA,
method readAlignments (line 710) | public static List<Alignment> readAlignments(PhraseReader phraseA, Phr...
method writeAlignments (line 741) | public static void writeAlignments(List<Alignment> alignments,
method writeAlignments (line 764) | public static void writeAlignments(List<Alignment> alignments,
FILE: src/opennlp/ccg/alignment/EncodingScheme.java
type EncodingScheme (line 46) | public interface EncodingScheme {
method isFieldDelimiter (line 53) | public boolean isFieldDelimiter(Character c);
method isMappingDelimiter (line 60) | public boolean isMappingDelimiter(Character c);
method isGroupDelimiter (line 67) | public boolean isGroupDelimiter(Character c);
method getFieldDelimiter (line 72) | public Character getFieldDelimiter();
method getMappingDelimiter (line 77) | public Character getMappingDelimiter();
method getGroupDelimiter (line 82) | public Character getGroupDelimiter();
method getPhraseNumberBase (line 87) | public IndexBase getPhraseNumberBase();
method getIndexBase (line 92) | public IndexBase getIndexBase();
method getOrder (line 97) | public List<MappingFormat.Field> getOrder();
method getRequired (line 102) | public Set<MappingFormat.Field> getRequired();
method getDefaults (line 107) | public Set<MappingFormat.Field> getDefaults();
FILE: src/opennlp/ccg/alignment/IdentifiedPhraseReader.java
class IdentifiedPhraseReader (line 45) | public class IdentifiedPhraseReader extends PhraseReader {
method IdentifiedPhraseReader (line 61) | public IdentifiedPhraseReader(Reader in) {
method IdentifiedPhraseReader (line 73) | public IdentifiedPhraseReader(Reader in, IndexBase numberBase) {
method IdentifiedPhraseReader (line 84) | public IdentifiedPhraseReader(Reader in, IndexBase numberBase, String ...
method getLastId (line 102) | public String getLastId() {
method getPhraseTag (line 109) | public String getPhraseTag() {
method getPhraseIdentifierAttribute (line 116) | public String getPhraseIdentifierAttribute() {
method readPhrase (line 128) | @Override
FILE: src/opennlp/ccg/alignment/IdentifiedPhraseWriter.java
class IdentifiedPhraseWriter (line 36) | public class IdentifiedPhraseWriter extends PhraseWriter {
method IdentifiedPhraseWriter (line 46) | public IdentifiedPhraseWriter(Writer out) {
method IdentifiedPhraseWriter (line 56) | public IdentifiedPhraseWriter(Writer out, String wordSeparator) {
method IdentifiedPhraseWriter (line 66) | public IdentifiedPhraseWriter(Writer out, String wordSeparator, String...
method IdentifiedPhraseWriter (line 80) | public IdentifiedPhraseWriter(Writer out, String wordSeparator, String...
method getPhraseTag (line 99) | public String getPhraseTag() {
method getPhraseIdentifierAttribute (line 106) | public String getPhraseIdentifierAttribute() {
method isPadding (line 114) | public boolean isPadding() {
method preWritePhrase (line 126) | @Override
method postWritePhrase (line 151) | @Override
FILE: src/opennlp/ccg/alignment/IndexBase.java
type IndexBase (line 33) | public enum IndexBase {
method IndexBase (line 47) | private IndexBase() {
method getStart (line 55) | public Integer getStart() {
method getNullValue (line 62) | public Integer getNullValue() {
method isValidIndex (line 71) | public boolean isValidIndex(Integer index) {
method translate (line 87) | public Integer translate(Integer index, IndexBase target) {
FILE: src/opennlp/ccg/alignment/Mapping.java
class Mapping (line 52) | public class Mapping implements Comparable<Mapping> {
method Mapping (line 62) | public Mapping(Integer a, Integer b) {
method Mapping (line 71) | public Mapping(Integer phraseNumber, Integer a, Integer b) {
method Mapping (line 80) | public Mapping(Integer phraseNumber, Integer a, Integer b, Status stat...
method Mapping (line 100) | public Mapping(Integer phraseNumber, Integer a, Integer b, Status stat...
method copyWithPhraseNumber (line 128) | public Mapping copyWithPhraseNumber(Integer phraseNumber) {
method mappingByPosition (line 142) | public static Mapping mappingByPosition(Integer phraseNumber, Integer ...
method mappingByPosition (line 165) | public static Mapping mappingByPosition(Integer phraseNumber, Integer ...
method reverse (line 178) | public Mapping reverse() {
method getPhraseNumber (line 185) | public Integer getPhraseNumber() {
method getA (line 193) | public Integer getA() {
method getB (line 201) | public Integer getB() {
method get (line 210) | public Integer get(PhrasePosition pos) {
method getStatus (line 217) | public Status getStatus() {
method setStatus (line 224) | public void setStatus(Status status) {
method getConfidence (line 232) | public Double getConfidence() {
method setConfidence (line 241) | public void setConfidence(Double confidence) {
method compareTo (line 255) | @Override
method hashCode (line 280) | @Override
method equals (line 293) | @Override
method checkPhraseNumber (line 305) | void checkPhraseNumber(Integer phraseNumber) throws IndexOutOfBoundsEx...
method checkIndex (line 311) | void checkIndex(Integer index) throws IndexOutOfBoundsException {
method checkField (line 321) | void checkField(Object obj, String name) throws IllegalArgumentExcepti...
method toString (line 333) | @Override
FILE: src/opennlp/ccg/alignment/MappingFormat.java
class MappingFormat (line 67) | public class MappingFormat extends Format {
class Field (line 83) | public static class Field extends java.text.Format.Field {
method Field (line 120) | protected Field(String name, Object defaultValue) {
method hasDefaultValue (line 134) | public boolean hasDefaultValue() {
method getDefaultValue (line 142) | public Object getDefaultValue() {
method toString (line 149) | @Override
method MappingFormat (line 167) | protected MappingFormat(EncodingScheme scheme, Set<Field> fields, bool...
method getInstance (line 195) | public static MappingFormat getInstance(EncodingScheme scheme) {
method getInstance (line 204) | public static MappingFormat getInstance(EncodingScheme scheme, Set<Fie...
method getInstance (line 219) | public static MappingFormat getInstance(EncodingScheme scheme, Set<Fie...
method getEncodingScheme (line 261) | public EncodingScheme getEncodingScheme() {
method getFields (line 269) | public Set<Field> getFields() {
method isStrict (line 279) | public boolean isStrict() {
method formatMapping (line 289) | public String formatMapping(Mapping mapping) {
method format (line 311) | @Override
method format (line 343) | public StringBuffer format(Mapping mapping, StringBuffer toAppendTo, F...
method fieldAtIndex (line 431) | Field fieldAtIndex(int i) throws IndexOutOfBoundsException {
method parseMapping (line 450) | public Mapping parseMapping(String source) throws ParseException {
method parseObject (line 465) | @Override
FILE: src/opennlp/ccg/alignment/MappingGroup.java
class MappingGroup (line 29) | public class MappingGroup implements Comparable<MappingGroup> {
method MappingGroup (line 39) | public MappingGroup(Integer phraseNumber, int length) {
method getPhraseNumber (line 54) | public Integer getPhraseNumber() {
method getLength (line 61) | public int getLength() {
method equals (line 69) | @Override
method compareTo (line 82) | public int compareTo(MappingGroup ag) {
method hashCode (line 95) | @Override
method toString (line 106) | @Override
FILE: src/opennlp/ccg/alignment/MappingReader.java
class MappingReader (line 48) | public class MappingReader extends FilterReader {
method MappingReader (line 63) | public MappingReader(Reader r, MappingFormat format) {
method getFormat (line 77) | public MappingFormat getFormat() {
method nextGroup (line 89) | public MappingGroup nextGroup() throws IOException {
method canRead (line 185) | public boolean canRead() {
method read (line 194) | @Override
method read (line 213) | @Override
method ready (line 241) | @Override
method readMapping (line 252) | public Mapping readMapping() throws IOException {
method close (line 268) | @Override
method checkRead (line 278) | void checkRead() throws IOException {
method checkMappingCount (line 284) | void checkMappingCount() throws IOException {
FILE: src/opennlp/ccg/alignment/MappingWriter.java
class MappingWriter (line 51) | public class MappingWriter extends FilterWriter {
method MappingWriter (line 66) | public MappingWriter(Writer out, MappingFormat format) {
method getFormat (line 79) | public MappingFormat getFormat() {
method getCurrentGroup (line 86) | public MappingGroup getCurrentGroup() {
method checkWrite (line 90) | void checkWrite() throws IOException {
method checkMappingCount (line 96) | void checkMappingCount() throws IOException {
method startGroup (line 114) | public void startGroup(MappingGroup mappingGroup) throws IOException {
method endGroup (line 129) | public void endGroup() throws IOException {
method write (line 155) | @Override
method write (line 165) | @Override
method write (line 175) | @Override
method write (line 185) | @Override
method write (line 195) | @Override
method canWrite (line 210) | public boolean canWrite() {
method writeMapping (line 225) | public void writeMapping(Mapping mapping) throws IOException {
method close (line 254) | @Override
FILE: src/opennlp/ccg/alignment/MosesEncodingScheme.java
class MosesEncodingScheme (line 42) | public class MosesEncodingScheme extends AbstractEncodingScheme {
method MosesEncodingScheme (line 48) | public MosesEncodingScheme() {
FILE: src/opennlp/ccg/alignment/NAACLEncodingScheme.java
class NAACLEncodingScheme (line 46) | public class NAACLEncodingScheme extends AbstractEncodingScheme {
method NAACLEncodingScheme (line 52) | public NAACLEncodingScheme() {
FILE: src/opennlp/ccg/alignment/Phrase.java
class Phrase (line 41) | public class Phrase extends AbstractList<String> implements Comparable<P...
method Phrase (line 51) | public Phrase(Integer number, List<String> words) {
method Phrase (line 59) | public Phrase(Integer number, String... words) {
method Phrase (line 67) | public Phrase(String id, Integer number, List<String> words) {
method Phrase (line 78) | public Phrase(String id, Integer number, String... words) {
method checkObject (line 91) | void checkObject(Object obj, String name) {
method getId (line 101) | public String getId() {
method getNumber (line 108) | public Integer getNumber() {
method get (line 115) | @Override
method size (line 123) | @Override
method compareTo (line 134) | @Override
method equals (line 143) | @Override
method hashCode (line 157) | @Override
method toString (line 170) | @Override
FILE: src/opennlp/ccg/alignment/PhrasePosition.java
type PhrasePosition (line 27) | public enum PhrasePosition {
method opposite (line 44) | public PhrasePosition opposite() {
FILE: src/opennlp/ccg/alignment/PhraseReader.java
class PhraseReader (line 30) | public class PhraseReader extends LineNumberReader {
method PhraseReader (line 45) | public PhraseReader(Reader in) {
method PhraseReader (line 57) | public PhraseReader(Reader in, IndexBase numberBase) {
method getNumberBase (line 71) | public IndexBase getNumberBase() {
method getPhraseNumber (line 81) | public Integer getPhraseNumber() {
method readPhrase (line 91) | public Phrase readPhrase() throws IOException {
FILE: src/opennlp/ccg/alignment/PhraseWriter.java
class PhraseWriter (line 33) | public class PhraseWriter extends FilterWriter {
method PhraseWriter (line 44) | public PhraseWriter(Writer out) {
method PhraseWriter (line 56) | public PhraseWriter(Writer out, String wordSeparator) {
method getWordSeparator (line 70) | public String getWordSeparator() {
method writePhrase (line 84) | public void writePhrase(Phrase phrase) throws IOException {
method preWritePhrase (line 94) | protected void preWritePhrase(Phrase phrase) throws IOException {
method postWritePhrase (line 103) | protected void postWritePhrase(Phrase phrase) throws IOException {
FILE: src/opennlp/ccg/alignment/Status.java
type Status (line 31) | public enum Status {
method Status (line 49) | private Status() {
method getAbbreviation (line 57) | public String getAbbreviation() {
method forAbbreviation (line 67) | public static Status forAbbreviation(String abbreviation) {
FILE: src/opennlp/ccg/disjunctivizer/AlignedEdgeFilter.java
class AlignedEdgeFilter (line 49) | public class AlignedEdgeFilter extends MatchTypeFilter {
method AlignedEdgeFilter (line 60) | public AlignedEdgeFilter(Set<Integer> alignmentIndices, MatchType... m...
method AlignedEdgeFilter (line 74) | public AlignedEdgeFilter(Set<Integer> alignmentIndices, Collection<Mat...
method checkAlignmentIndices (line 81) | private void checkAlignmentIndices(Set<Integer> alignmentIndices) {
method getAlignmentIndices (line 90) | public Set<Integer> getAlignmentIndices() {
method setAlignmentIndices (line 98) | public void setAlignmentIndices(Set<Integer> alignmentIndices) {
method allows (line 123) | @Override
FILE: src/opennlp/ccg/disjunctivizer/Disjunctivizer.java
class Disjunctivizer (line 77) | public class Disjunctivizer {
method Disjunctivizer (line 153) | public Disjunctivizer() throws ParserConfigurationException {
method Disjunctivizer (line 162) | public Disjunctivizer(Document document) {
method Disjunctivizer (line 176) | public Disjunctivizer(Document document,
method getDocument (line 192) | public Document getDocument() {
method setDocument (line 200) | public void setDocument(Document document) {
method isProcessingInserts (line 207) | public boolean isProcessingInserts() {
method setProcessingInserts (line 214) | public void setProcessingInserts(boolean processingInserts) {
method isProcessingDeletes (line 224) | public boolean isProcessingDeletes() {
method setProcessingDeletes (line 231) | public void setProcessingDeletes(boolean processingDeletes) {
method isProcessingSubstitutions (line 241) | public boolean isProcessingSubstitutions() {
method setProcessingSubstitutions (line 248) | public void setProcessingSubstitutions(boolean processingSubstitutions) {
method resetDisjunctiveLF (line 255) | private void resetDisjunctiveLF() {
method buildDisjunctiveLFFor (line 267) | public Element buildDisjunctiveLFFor(LFGraphDifference graphDifference) {
method findForeignAlignedSubgraphRoots (line 304) | private void findForeignAlignedSubgraphRoots() {
method createDisjunctiveElement (line 325) | private Element createDisjunctiveElement(DLFContext context) {
method nameFor (line 395) | private String nameFor(LFVertex vertex) {
method processInserts (line 400) | private void processInserts(DLFContext context) {
method processDeletes (line 424) | private void processDeletes(DLFContext context) {
method doInsertDelete (line 438) | private void doInsertDelete(DLFContext context, LFEdge edge) {
method processSubstitutions (line 448) | private void processSubstitutions(DLFContext context) {
method processNonsubstitutedEdge (line 459) | private void processNonsubstitutedEdge(DLFContext context, LFEdge outg...
method processSubstitutedEdge (line 482) | private void processSubstitutedEdge(DLFContext context, LFEdge outgoin...
method processDifferentPredicates (line 539) | private void processDifferentPredicates(DLFContext context, LFEdge out...
method processSingletonDifferentPredicate (line 571) | private void processSingletonDifferentPredicate(DLFContext context, LF...
method processMultipleDifferentPredicates (line 603) | private void processMultipleDifferentPredicates(DLFContext context, LF...
method processSubstitutedEdges (line 636) | private void processSubstitutedEdges(DLFContext context, LFEdge outgoi...
method processSubstitutedSimilarTarget (line 690) | private void processSubstitutedSimilarTarget(DLFContext context, LFEdg...
method processSubstitutedPredicates (line 723) | private void processSubstitutedPredicates(DLFContext context, LFEdge o...
method addRelation (line 757) | private Element addRelation(DLFContext context, LFEdgeLabel label) {
method addOptional (line 764) | private Element addOptional(DLFContext context) {
method addChoice (line 768) | private Element addChoice(DLFContext context) {
method addElement (line 772) | private Element addElement(DLFContext context, String elementName) {
method addAttributes (line 779) | private Element addAttributes(DLFContext context, String name, String ...
method addAttributes (line 788) | private Element addAttributes(DLFContext context, Map<Mode,Proposition...
method fixLabelReferences (line 802) | private void fixLabelReferences(Element newNode) {
method fixOptions (line 836) | private void fixOptions(DLFContext context, LFEdgeLabel label) {
method setPredicateName (line 867) | private void setPredicateName(DLFContext context) {
method addNonPredAttributes (line 875) | private void addNonPredAttributes(DLFContext context) {
method assimilateAttributes (line 884) | private void assimilateAttributes(DLFContext context, LFVertex one, LF...
class SimilarTargetVertexFilter (line 923) | static class SimilarTargetVertexFilter implements Filter<LFVertex> {
method SimilarTargetVertexFilter (line 926) | SimilarTargetVertexFilter(LFVertex vertex) {
method allows (line 930) | @Override
class SimilarTargetEdgeFilter (line 937) | static class SimilarTargetEdgeFilter extends DelegatedFilter<LFEdge, L...
method SimilarTargetEdgeFilter (line 941) | SimilarTargetEdgeFilter(LFVertex vertex, LFEdgeLabel label) {
method allows (line 946) | @Override
method delegateValueFor (line 951) | @Override
class TerminalFilter (line 957) | class TerminalFilter implements Filter<LFVertex> {
method TerminalFilter (line 960) | TerminalFilter(LFGraph graph) {
method allows (line 964) | @Override
type VertexType (line 970) | static enum VertexType {
class DLFContext (line 974) | class DLFContext {
method DLFContext (line 981) | DLFContext(PhrasePosition graphPosition, LFVertex vertex, Element pa...
method DLFContext (line 985) | DLFContext(PhrasePosition graphPosition, LFVertex vertex, Element pa...
method getGraph (line 993) | LFGraph getGraph() {
method copy (line 997) | DLFContext copy() {
method copy (line 1001) | DLFContext copy(boolean copyVertices) {
method copyWithVertexMask (line 1006) | DLFContext copyWithVertexMask(VertexType... vertexType) {
method copyVertices (line 1010) | Map<VertexType, Set<LFVertex>> copyVertices() {
method copyVertices (line 1014) | Map<VertexType, Set<LFVertex>> copyVertices(VertexType... vertexType) {
method getVertices (line 1021) | Set<LFVertex> getVertices(VertexType vertexType) {
method addVertex (line 1032) | boolean addVertex(LFVertex vertex, VertexType vertexType) {
class LFGraphIterator (line 1037) | static class LFGraphIterator extends DepthFirstIterator<LFVertex, LFEd...
method LFGraphIterator (line 1038) | LFGraphIterator(LFGraph graph) {
method LFGraphIterator (line 1042) | LFGraphIterator(LFGraph graph, LFVertex startVertex) {
FILE: src/opennlp/ccg/disjunctivizer/EdgeMatchFilter.java
class EdgeMatchFilter (line 56) | public class EdgeMatchFilter extends CompositeFilter<LFEdge> {
method EdgeMatchFilter (line 65) | public EdgeMatchFilter(LFEdge basis, MatchType... matchTypes) {
method EdgeMatchFilter (line 80) | public EdgeMatchFilter(LFEdge basis, Collection<MatchType> matchTypes) {
method checkBasis (line 111) | private void checkBasis(LFEdge basis) {
method getBasis (line 120) | public LFEdge getBasis() {
method setBasis (line 128) | public void setBasis(LFEdge basis) {
method getMatchTypes (line 136) | public EnumSet<MatchType> getMatchTypes() {
FILE: src/opennlp/ccg/disjunctivizer/FilteredLFEdgeSet.java
class FilteredLFEdgeSet (line 42) | public class FilteredLFEdgeSet extends FilteredSet<LFEdge> {
method FilteredLFEdgeSet (line 50) | public FilteredLFEdgeSet(Set<? extends LFEdge> edges, Filter<? super L...
method sourceView (line 60) | public Collection<LFVertex> sourceView() {
method targetView (line 70) | public Collection<LFVertex> targetView() {
method labelView (line 80) | public Collection<LFEdgeLabel> labelView() {
class ComponentView (line 84) | abstract class ComponentView<T> extends AbstractCollection<T> {
method componentOf (line 86) | abstract T componentOf(LFEdge edge);
method iterator (line 88) | @Override
method size (line 111) | @Override
class VertexView (line 117) | class VertexView extends ComponentView<LFVertex> {
method VertexView (line 121) | VertexView(boolean source) {
method componentOf (line 125) | @Override
class LabelView (line 131) | class LabelView extends ComponentView<LFEdgeLabel> {
method componentOf (line 133) | @Override
FILE: src/opennlp/ccg/disjunctivizer/LFGraphDifference.java
class LFGraphDifference (line 64) | public class LFGraphDifference {
method LFGraphDifference (line 82) | public LFGraphDifference(LFGraph a, LFGraph b, Alignment alignment) {
method checkGraph (line 95) | private void checkGraph(LFGraph g, PhrasePosition pos) {
method getA (line 104) | public LFGraph getA() {
method getB (line 111) | public LFGraph getB() {
method get (line 121) | public LFGraph get(PhrasePosition position) {
method getAlignment (line 128) | public Alignment getAlignment() {
method hashCode (line 136) | @Override
method equals (line 145) | @Override
method toString (line 158) | @Override
method reverse (line 182) | public LFGraphDifference reverse() {
method deletes (line 194) | public Set<LFEdge> deletes() {
method inserts (line 206) | public Set<LFEdge> inserts() {
method doDeletes (line 210) | Set<LFEdge> doDeletes(PhrasePosition keyPosition) {
method insertsFor (line 225) | @SuppressWarnings("unchecked")
method deletesFor (line 241) | public Set<LFEdge> deletesFor(LFVertex vertex) {
method substitutions (line 257) | public Set<LFEdge> substitutions() {
method substitutionsFor (line 296) | @SuppressWarnings("unchecked")
method substitutionsBySource (line 319) | @SuppressWarnings("unchecked")
method substitutionsBySourceFor (line 339) | public Map<LFVertex, Set<LFEdge>> substitutionsBySourceFor(LFEdge edge) {
class SourceView (line 344) | class SourceView extends AbstractMap<LFVertex, FilteredLFEdgeSet> {
method entrySet (line 346) | @Override
class SubstitutedSourceView (line 392) | class SubstitutedSourceView extends AbstractMap<LFVertex, Set<LFEdge>> {
method SubstitutedSourceView (line 399) | SubstitutedSourceView(Map<LFVertex, Set<LFEdge>> sourceView, LFEdge ...
method entrySet (line 404) | @Override
class EntrySet (line 409) | class EntrySet extends AbstractSet<Entry<LFVertex, Set<LFEdge>>> {
method entries (line 414) | Set<Entry<LFVertex, Set<LFEdge>>> entries() {
method size (line 438) | @Override
method iterator (line 443) | @Override
FILE: src/opennlp/ccg/disjunctivizer/LabelMatchFilter.java
class LabelMatchFilter (line 33) | public class LabelMatchFilter implements Filter<LFEdge> {
method LabelMatchFilter (line 42) | public LabelMatchFilter(LFEdgeLabel basis) {
method checkBasis (line 47) | private void checkBasis(LFEdgeLabel basis) {
method getBasis (line 59) | public LFEdgeLabel getBasis() {
method setBasis (line 67) | public void setBasis(LFEdgeLabel basis) {
method allows (line 79) | @Override
FILE: src/opennlp/ccg/disjunctivizer/MatchType.java
type MatchType (line 33) | public enum MatchType {
FILE: src/opennlp/ccg/disjunctivizer/MatchTypeFilter.java
class MatchTypeFilter (line 35) | public abstract class MatchTypeFilter implements Filter<LFEdge> {
method MatchTypeFilter (line 46) | protected MatchTypeFilter(MatchType... matchTypes) {
method MatchTypeFilter (line 55) | protected MatchTypeFilter(Collection<MatchType> matchTypes) {
method getMatchTypes (line 62) | public EnumSet<MatchType> getMatchTypes() {
FILE: src/opennlp/ccg/disjunctivizer/VertexMatchFilter.java
class VertexMatchFilter (line 46) | public class VertexMatchFilter extends MatchTypeFilter {
method VertexMatchFilter (line 56) | public VertexMatchFilter(LFVertex basis, MatchType... matchTypes) {
method VertexMatchFilter (line 70) | public VertexMatchFilter(LFVertex basis, Collection<MatchType> matchTy...
method checkBasis (line 77) | private void checkBasis(LFVertex basis) {
method getBasis (line 86) | public LFVertex getBasis() {
method setBasis (line 94) | public void setBasis(LFVertex basis) {
method allows (line 120) | @Override
FILE: src/opennlp/ccg/grammar/AbstractApplicationRule.java
class AbstractApplicationRule (line 35) | public abstract class AbstractApplicationRule extends AbstractRule {
method toXml (line 42) | public Element toXml(String dir) {
method arity (line 48) | public int arity() {
method apply (line 52) | protected List<Category> apply(Category xyCat, Category yCat) throws U...
FILE: src/opennlp/ccg/grammar/AbstractCompositionRule.java
class AbstractCompositionRule (line 37) | public abstract class AbstractCompositionRule extends AbstractApplicatio...
method initEisnerConstraints (line 51) | private static boolean initEisnerConstraints() {
method toXml (line 61) | public Element toXml(String dir) {
method eisner (line 68) | protected boolean eisner() { return useEisnerConstraints && _isHarmoni...
method apply (line 70) | protected List<Category> apply(Category xyCat, Category yzCat) throws ...
method createResult (line 163) | private Category createResult(Category result, ArgStack zStack,
method modify (line 188) | public void modify(Mutable m) {
method composeComplexY (line 195) | private ArgStack composeComplexY(ComplexCat xyOuterCC, Slash xySlash,
FILE: src/opennlp/ccg/grammar/AbstractRule.java
class AbstractRule (line 37) | public abstract class AbstractRule implements Rule, Serializable {
method toXml (line 51) | abstract public Element toXml();
method applyRule (line 54) | public void applyRule(Sign[] inputs, List<Sign> results) {
method distributeTargetFeatures (line 86) | protected void distributeTargetFeatures(Category cat) {
class DistributeTargetFeaturesFcn (line 103) | private class DistributeTargetFeaturesFcn extends CategoryFcnAdapter i...
method forall (line 105) | public void forall(Category c) {
method arity (line 127) | public abstract int arity();
method applyRule (line 137) | public abstract List<Category> applyRule(Category[] inputs) throws Uni...
method showApplyInstance (line 141) | protected void showApplyInstance(Category[] inputs) {
method showApplyInstance (line 153) | protected void showApplyInstance(Category first, Category second) {
method name (line 162) | public String name() {
method getRuleGroup (line 169) | public RuleGroup getRuleGroup() { return _ruleGroup; }
method setRuleGroup (line 174) | public void setRuleGroup(RuleGroup ruleGroup) { _ruleGroup = ruleGroup; }
method appendLFs (line 178) | protected void appendLFs(Category cat1, Category cat2, Category result...
FILE: src/opennlp/ccg/grammar/AbstractSubstitutionRule.java
class AbstractSubstitutionRule (line 35) | public abstract class AbstractSubstitutionRule extends AbstractApplicati...
method toXml (line 43) | public Element toXml(String dir) {
method apply (line 50) | protected List<Category> apply (Category xyzCat, Category yzCat)
FILE: src/opennlp/ccg/grammar/AbstractTypeRaisingRule.java
class AbstractTypeRaisingRule (line 37) | public abstract class AbstractTypeRaisingRule extends AbstractRule {
method AbstractTypeRaisingRule (line 63) | protected AbstractTypeRaisingRule(
method toXml (line 96) | public Element toXml(String dir) {
method arity (line 115) | public int arity() {
method applyRule (line 120) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure {
method apply (line 128) | protected List<Category> apply(Category input) throws UnifyFailure {
FILE: src/opennlp/ccg/grammar/BackwardApplication.java
class BackwardApplication (line 34) | public class BackwardApplication extends AbstractApplicationRule {
method BackwardApplication (line 38) | public BackwardApplication () {
method toXml (line 45) | public Element toXml() { return super.toXml("backward"); }
method applyRule (line 47) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure {
method toString (line 54) | public String toString() {
FILE: src/opennlp/ccg/grammar/BackwardComposition.java
class BackwardComposition (line 34) | public class BackwardComposition extends AbstractCompositionRule {
method BackwardComposition (line 38) | public BackwardComposition() {
method BackwardComposition (line 42) | public BackwardComposition(boolean isHarmonic) {
method toXml (line 57) | public Element toXml() { return super.toXml("backward"); }
method applyRule (line 59) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure {
method toString (line 67) | public String toString() {
FILE: src/opennlp/ccg/grammar/BackwardSubstitution.java
class BackwardSubstitution (line 34) | public class BackwardSubstitution extends AbstractSubstitutionRule {
method BackwardSubstitution (line 38) | public BackwardSubstitution() {
method BackwardSubstitution (line 42) | public BackwardSubstitution(boolean isHarmonic) {
method toXml (line 57) | public Element toXml() { return super.toXml("backward"); }
method applyRule (line 59) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure {
method toString (line 67) | public String toString() {
FILE: src/opennlp/ccg/grammar/BackwardTypeRaising.java
class BackwardTypeRaising (line 32) | public class BackwardTypeRaising extends AbstractTypeRaisingRule {
method BackwardTypeRaising (line 37) | public BackwardTypeRaising(boolean useDollar, Category arg, Category r...
method toXml (line 43) | public Element toXml() {
method toString (line 47) | public String toString() {
FILE: src/opennlp/ccg/grammar/ForwardApplication.java
class ForwardApplication (line 34) | public class ForwardApplication extends AbstractApplicationRule {
method ForwardApplication (line 38) | public ForwardApplication() {
method toXml (line 45) | public Element toXml() { return super.toXml("forward"); }
method applyRule (line 47) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure {
method toString (line 55) | public String toString() {
FILE: src/opennlp/ccg/grammar/ForwardComposition.java
class ForwardComposition (line 34) | public class ForwardComposition extends AbstractCompositionRule {
method ForwardComposition (line 38) | public ForwardComposition() {
method ForwardComposition (line 42) | public ForwardComposition(boolean isHarmonic) {
method toXml (line 57) | public Element toXml() { return super.toXml("forward"); }
method applyRule (line 59) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure {
method toString (line 67) | public String toString() {
FILE: src/opennlp/ccg/grammar/ForwardSubstitution.java
class ForwardSubstitution (line 34) | public class ForwardSubstitution extends AbstractSubstitutionRule {
method ForwardSubstitution (line 38) | public ForwardSubstitution() {
method ForwardSubstitution (line 42) | public ForwardSubstitution(boolean isHarmonic) {
method toXml (line 57) | public Element toXml() { return super.toXml("forward"); }
method applyRule (line 59) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure {
method toString (line 67) | public String toString() {
FILE: src/opennlp/ccg/grammar/ForwardTypeRaising.java
class ForwardTypeRaising (line 32) | public class ForwardTypeRaising extends AbstractTypeRaisingRule {
method ForwardTypeRaising (line 37) | public ForwardTypeRaising (boolean useDollar, Category arg, Category r...
method toXml (line 43) | public Element toXml() {
method toString (line 47) | public String toString() {
FILE: src/opennlp/ccg/grammar/FragmentJoining.java
class FragmentJoining (line 35) | public class FragmentJoining extends AbstractRule {
method FragmentJoining (line 40) | public FragmentJoining() { _name = "*"; }
method toXml (line 43) | public Element toXml() { throw new RuntimeException("toXml not support...
method applyRule (line 48) | public Sign applyRule(Sign sign1, Sign sign2) {
method applyRule (line 59) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure {
method arity (line 80) | public int arity() { return 2; }
method toString (line 83) | public String toString() {
FILE: src/opennlp/ccg/grammar/GlueRule.java
class GlueRule (line 37) | public class GlueRule extends AbstractRule {
method GlueRule (line 48) | public GlueRule() { _name = "glue"; }
method toXml (line 51) | public Element toXml() { throw new RuntimeException("toXml not support...
method arity (line 54) | public int arity() { return 2; }
method applyRule (line 57) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure {
method isModifier (line 89) | private static boolean isModifier(Category cat) {
method toString (line 101) | public String toString() {
FILE: src/opennlp/ccg/grammar/Grammar.java
class Grammar (line 52) | public class Grammar {
method Grammar (line 116) | public Grammar(String filename) throws IOException {
method Grammar (line 121) | public Grammar(URL url) throws IOException {
method Grammar (line 126) | @SuppressWarnings("unchecked")
method convertToFileUrl (line 241) | public static String convertToFileUrl(String filename) {
method initializeTransformers (line 253) | private void initializeTransformers() throws TransformerConfigurationE...
method fromXmlSetup (line 289) | private SAXSource fromXmlSetup(InputStream istream) throws IOException {
method loadFromXml (line 330) | public synchronized Document loadFromXml(InputStream istream) throws I...
method loadFromXml (line 348) | public synchronized Document loadFromXml(String filename) throws IOExc...
method toXmlSetup (line 358) | private SAXSource toXmlSetup(Source source) throws IOException {
method saveToXml (line 402) | public synchronized void saveToXml(LF lf, String target, String filena...
method saveToXml (line 417) | public synchronized void saveToXml(LF lf, String target, OutputStream ...
method transformLF (line 443) | public synchronized LF transformLF(LF lf) throws IOException {
method loadLF (line 455) | public synchronized LF loadLF(Document doc) throws IOException {
method serializeXml (line 467) | public synchronized void serializeXml(Document doc, OutputStream out) ...
method makeLfElt (line 484) | public synchronized Element makeLfElt(LF lf) throws IOException {
method isPitchAccent (line 506) | public static boolean isPitchAccent(String s) {
method isBoundaryTone (line 519) | public static boolean isBoundaryTone(String s) {
method saveToApml (line 534) | public synchronized void saveToApml(Sign sign, String filename) throws...
method saveToApml (line 552) | public synchronized void saveToApml(Sign sign, Writer writer) throws I...
method getParsedWords (line 573) | public List<Word> getParsedWords(String s) {
method getName (line 593) | public final String getName() {
method toMorphXml (line 602) | public void toMorphXml(List<Word> words, String filename) throws IOExc...
method toLexiconXml (line 627) | public void toLexiconXml(List<Category> cats, List<String> POSs, Strin...
FILE: src/opennlp/ccg/grammar/Rule.java
type Rule (line 37) | public interface Rule {
method applyRule (line 47) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure;
method arity (line 55) | public int arity();
method name (line 60) | public String name();
method getRuleGroup (line 65) | public RuleGroup getRuleGroup();
method setRuleGroup (line 70) | public void setRuleGroup(RuleGroup ruleGroup);
method toXml (line 73) | public Element toXml();
FILE: src/opennlp/ccg/grammar/RuleGroup.java
class RuleGroup (line 47) | public class RuleGroup implements Serializable {
class SupercatRuleCombo (line 69) | private class SupercatRuleCombo {
method SupercatRuleCombo (line 75) | public SupercatRuleCombo(String supercat, String rule) {
method SupercatRuleCombo (line 79) | public SupercatRuleCombo(String supercat, String supercat2, String r...
method setCombo (line 84) | public void setCombo(String supercat, String rule) {
method setCombo (line 87) | public void setCombo(String supercat, String supercat2, String rule) {
method hashCode (line 91) | public int hashCode() {
method equals (line 95) | public boolean equals(Object obj) {
method supercatHashCode (line 101) | public int supercatHashCode() {
method supercatEquals (line 105) | public boolean supercatEquals(Object obj) {
method toString (line 111) | public String toString() {
class SupercatComboSet (line 121) | private static class SupercatComboSet extends THashSet {
method SupercatComboSet (line 123) | SupercatComboSet() {
method get (line 137) | SupercatRuleCombo get(SupercatRuleCombo combo) {
method RuleGroup (line 159) | public RuleGroup(Grammar grammar) {
method RuleGroup (line 168) | public RuleGroup(URL url, Grammar grammar) throws IOException {
method readObject (line 190) | private void readObject(java.io.ObjectInputStream in) throws IOExcepti...
method readRule (line 198) | private Rule readRule(Element ruleEl) {
method readTypeChangingRule (line 254) | private Rule readTypeChangingRule(Element ruleEl) {
method toXml (line 278) | public void toXml(String filename) throws IOException {
method setDynamicCombos (line 298) | public void setDynamicCombos(boolean dynamic) {
method getDynamicCombos (line 310) | public boolean getDynamicCombos() { return dynamicCombos; }
method loadSupercatRuleCombos (line 317) | public void loadSupercatRuleCombos(URL url) throws IOException {
method borrowSupercatRuleCombos (line 345) | public void borrowSupercatRuleCombos(RuleGroup ruleGroup) {
method addRule (line 352) | public void addRule(Rule r) {
method index (line 367) | private void index(TypeChangingRule rule) {
method getUnaryRules (line 383) | public List<Rule> getUnaryRules() { return unaryRules; }
method getBinaryRules (line 386) | public List<Rule> getBinaryRules() { return binaryRules; }
method getTypeChangingRule (line 389) | public TypeChangingRule getTypeChangingRule(String name) {
method getRulesForPred (line 404) | public Collection<TypeChangingRule> getRulesForPred(String pred) {
method getRulesForRel (line 412) | public Collection<TypeChangingRule> getRulesForRel(String rel) {
method applyUnaryRules (line 418) | public List<Sign> applyUnaryRules(Sign input) {
method applyBinaryRules (line 474) | public List<Sign> applyBinaryRules(Sign input1, Sign input2) {
method applyGlueRule (line 532) | public List<Sign> applyGlueRule(Sign input1, Sign input2) {
method applyCoart (line 541) | public void applyCoart(Sign lexSign, Sign coartSign, List<Sign> result...
FILE: src/opennlp/ccg/grammar/TypeChangingRule.java
class TypeChangingRule (line 36) | public class TypeChangingRule extends AbstractRule implements LexSemOrig...
method TypeChangingRule (line 59) | public TypeChangingRule(Category arg, Category result, String name, LF...
method toXml (line 65) | public Element toXml() {
method arity (line 78) | public int arity() { return 1; }
method getArg (line 81) | public Category getArg() { return _arg; }
method getResult (line 84) | public Category getResult() { return _result; }
method getFirstEP (line 87) | public LF getFirstEP() { return _firstEP; }
method applyRule (line 91) | public List<Category> applyRule(Category[] inputs) throws UnifyFailure {
method apply (line 100) | protected List<Category> apply(Category input) throws UnifyFailure {
method toString (line 131) | public String toString() {
method getSupertag (line 140) | public String getSupertag() {
method getPOS (line 149) | public String getPOS() { return POS_STRING; }
method setOrigin (line 154) | public void setOrigin() { HyloHelper.setOrigin(_result.getLF(), this); }
FILE: src/opennlp/ccg/grammar/Types.java
class Types (line 40) | public class Types {
method Types (line 50) | public Types(Grammar grammar) {
method Types (line 59) | @SuppressWarnings("unchecked")
method getSimpleType (line 78) | public SimpleType getSimpleType(String typeName) {
method containsSimpleType (line 93) | public boolean containsSimpleType(String typeName) {
method getIndexMap (line 98) | public ArrayList<SimpleType> getIndexMap() {
method readTypes (line 104) | private void readTypes(List<Element> _types) {
method computeDepth (line 148) | private static int computeDepth(String type, GroupMap<String,String> p...
method findAllSubtypes (line 168) | private Collection<String> findAllSubtypes(GroupMap<String,String> hie...
method createSimpleTypes (line 191) | private void createSimpleTypes(GroupMap<String,String> hierarchy, TObj...
method printTypes (line 236) | public void printTypes() {
method debugSerialization (line 246) | public void debugSerialization() throws IOException, ClassNotFoundExce...
FILE: src/opennlp/ccg/grammardoc/AbstractDocumenter.java
class AbstractDocumenter (line 12) | public abstract class AbstractDocumenter implements Documenter {
method AbstractDocumenter (line 20) | protected AbstractDocumenter() {}
method AbstractDocumenter (line 25) | protected AbstractDocumenter(String name) {
method setDocumenterContext (line 32) | public void setDocumenterContext(DocumenterContext documenterContext) {
method getName (line 39) | public String getName() {
FILE: src/opennlp/ccg/grammardoc/Documenter.java
type Documenter (line 12) | public interface Documenter {
method getName (line 19) | public String getName();
method setDocumenterContext (line 26) | public void setDocumenterContext(DocumenterContext documenterContext);
method document (line 38) | public void document(SourceGrammar grammar) throws DocumenterException;
FILE: src/opennlp/ccg/grammardoc/DocumenterContext.java
type DocumenterContext (line 16) | public interface DocumenterContext {
method log (line 21) | public void log(String message);
method getDestinationDirectory (line 29) | public File getDestinationDirectory();
FILE: src/opennlp/ccg/grammardoc/DocumenterException.java
class DocumenterException (line 12) | public class DocumenterException extends GrammarDocException {
method DocumenterException (line 19) | public DocumenterException() {
method DocumenterException (line 26) | public DocumenterException(String message) {
method DocumenterException (line 33) | public DocumenterException(String message, Throwable cause) {
method DocumenterException (line 40) | public DocumenterException(Throwable cause) {
FILE: src/opennlp/ccg/grammardoc/DocumenterFactory.java
class DocumenterFactory (line 15) | public class DocumenterFactory {
method DocumenterFactory (line 21) | private DocumenterFactory() {}
method newInstance (line 26) | public static DocumenterFactory newInstance() {
method newDocumenter (line 39) | public Documenter newDocumenter() throws DocumenterNotFoundException {
method newDocumenter (line 51) | public Documenter newDocumenter(String name)
method newDocumenter (line 70) | public synchronized Documenter newDocumenter(DocumenterName name)
FILE: src/opennlp/ccg/grammardoc/DocumenterName.java
type DocumenterName (line 17) | public enum DocumenterName {
method DocumenterName (line 30) | private DocumenterName(Class<? extends Documenter> documenterClass) {
FILE: src/opennlp/ccg/grammardoc/DocumenterNotFoundException.java
class DocumenterNotFoundException (line 12) | public class DocumenterNotFoundException extends DocumenterException {
method DocumenterNotFoundException (line 20) | public DocumenterNotFoundException(DocumenterName name) {
method DocumenterNotFoundException (line 28) | public DocumenterNotFoundException(String name) {
method DocumenterNotFoundException (line 36) | DocumenterNotFoundException(DocumenterName name, Throwable cause) {
FILE: src/opennlp/ccg/grammardoc/DocumenterSourceException.java
class DocumenterSourceException (line 14) | public class DocumenterSourceException extends DocumenterException {
method DocumenterSourceException (line 24) | public DocumenterSourceException(SourceGrammarFile sourceGrammarFile) {
method DocumenterSourceException (line 32) | public DocumenterSourceException(String message,
method DocumenterSourceException (line 42) | public DocumenterSourceException(String message, Throwable cause,
method DocumenterSourceException (line 52) | public DocumenterSourceException(Throwable cause,
method getSourceGrammarFile (line 61) | public SourceGrammarFile getSourceGrammarFile() {
FILE: src/opennlp/ccg/grammardoc/GrammarDoc.java
class GrammarDoc (line 31) | public class GrammarDoc extends Task implements DocumenterContext {
method execute (line 43) | @Override
method loadSourceGrammar (line 92) | SourceGrammar loadSourceGrammar() throws GrammarDocException {
method loadGrammarFile (line 145) | SourceGrammarFile loadGrammarFile(SourceGrammarFileType fileType, File...
method getDestinationDirectory (line 171) | public File getDestinationDirectory() {
method getDestDir (line 178) | public File getDestDir() {
method setDestDir (line 185) | public void setDestDir(File destDir) {
method getSrcDir (line 192) | public File getSrcDir() {
method setSrcDir (line 199) | public void setSrcDir(File srcDir) {
method main (line 203) | public static void main(String[] args) {
class CommandGrammarDoc (line 255) | static final class CommandGrammarDoc extends GrammarDoc {
method CommandGrammarDoc (line 259) | CommandGrammarDoc(PrintStream out) {
method log (line 266) | @Override
FILE: src/opennlp/ccg/grammardoc/GrammarDocException.java
class GrammarDocException (line 12) | public class GrammarDocException extends Exception {
method GrammarDocException (line 19) | public GrammarDocException() {
method GrammarDocException (line 26) | public GrammarDocException(String message) {
method GrammarDocException (line 33) | public GrammarDocException(String message, Throwable cause) {
method GrammarDocException (line 40) | public GrammarDocException(Throwable cause) {
FILE: src/opennlp/ccg/grammardoc/SourceGrammar.java
class SourceGrammar (line 17) | public class SourceGrammar {
method SourceGrammar (line 22) | SourceGrammar(File sourceDirectory) {
method getSourceDirectory (line 31) | public File getSourceDirectory() {
method getSourceGrammarFileTypes (line 39) | public Set<SourceGrammarFileType> getSourceGrammarFileTypes() {
method getSourceGrammarFile (line 50) | public SourceGrammarFile getSourceGrammarFile(
method addSourceGrammarFile (line 55) | void addSourceGrammarFile(SourceGrammarFileType fileType,
FILE: src/opennlp/ccg/grammardoc/SourceGrammarFile.java
class SourceGrammarFile (line 17) | public class SourceGrammarFile {
method SourceGrammarFile (line 25) | SourceGrammarFile(SourceGrammarFileType fileName, File sourceFile) {
method getFileType (line 33) | public SourceGrammarFileType getFileType() {
method getSourceFile (line 40) | public File getSourceFile() {
method toString (line 50) | @Override
FILE: src/opennlp/ccg/grammardoc/SourceGrammarFileType.java
type SourceGrammarFileType (line 12) | public enum SourceGrammarFileType {
method SourceGrammarFileType (line 23) | private SourceGrammarFileType(String fileName) {
method SourceGrammarFileType (line 27) | private SourceGrammarFileType(String fileName, boolean required) {
method getFileName (line 35) | public String getFileName() {
method isRequired (line 44) | public boolean isRequired() {
FILE: src/opennlp/ccg/grammardoc/html/HTMLDocumenter.java
class HTMLDocumenter (line 38) | public class HTMLDocumenter extends AbstractDocumenter implements URIRes...
type FileName (line 46) | static enum FileName {
method FileName (line 51) | private FileName(String name) {
method HTMLDocumenter (line 59) | public HTMLDocumenter() {
method document (line 69) | public void document(SourceGrammar grammar) throws DocumenterException {
method resolve (line 132) | public Source resolve(String href, String base) throws TransformerExce...
method loadTemplates (line 157) | private Templates loadTemplates(String baseName)
method copyFiles (line 183) | private void copyFiles(File destDir) throws DocumenterException {
method doCopyFile (line 189) | private void doCopyFile(FileName fileName, File destDir)
method getResource (line 218) | private InputStream getResource(String resourceName) {
FILE: src/opennlp/ccg/grammardoc/html/lexicon.js
function toggleFeatures (line 5) | function toggleFeatures(elem) {
FILE: src/opennlp/ccg/hylo/Alt.java
class Alt (line 37) | public final class Alt implements Comparable<Alt>, Serializable {
method Alt (line 49) | public Alt(int altSet, int numInSet) {
method equals (line 54) | public boolean equals(Object o) {
method compareTo (line 60) | public int compareTo(Alt a) {
FILE: src/opennlp/ccg/hylo/Box.java
class Box (line 32) | public final class Box extends ModalOp {
method Box (line 36) | public Box(Element e) {
method Box (line 40) | private Box(Mode mode, LF arg) {
method copy (line 44) | public LF copy() {
method equals (line 48) | public boolean equals(Object o) {
method unifyCheck (line 56) | public void unifyCheck(Object u) throws UnifyFailure {
method fill (line 64) | public Object fill(Substitution sub) throws UnifyFailure {
method modalOpString (line 69) | public String modalOpString() {
method toXml (line 78) | public Element toXml() {
FILE: src/opennlp/ccg/hylo/Compacter.java
class Compacter (line 32) | public class Compacter {
method compact (line 41) | static LF compact(LF lf, Nominal root) {
method combine (line 252) | private static void combine(SatOp satOp1, SatOp satOp2) {
method subst (line 273) | private static boolean subst(LF lf, SatOp satOp2, Nominal nom2, Nomina...
method subst (line 279) | private static boolean subst(LF lf, Nominal currentParent, SatOp satOp...
method findDupParent (line 327) | private static Nominal findDupParent(LF lf, SatOp dup, Nominal dupNom) {
method findDupParent (line 332) | private static Nominal findDupParent(LF lf, Nominal currentParent, Sat...
FILE: src/opennlp/ccg/hylo/Converter.java
class Converter (line 32) | public class Converter {
method convertNominals (line 47) | static void convertNominals(LF lf) {
method convertNominals (line 56) | static Nominal convertNominals(LF lf, Sign root, Nominal nominalRoot) {
method convertNoms (line 75) | private void convertNoms(LF lf, Sign root) {
method convertNominal (line 143) | private Nominal convertNominal(Nominal oldNom, Proposition prop, int w...
method convertNominal (line 171) | private Nominal convertNominal(Nominal oldNom, String name) {
method lexDominated (line 183) | private static boolean lexDominated(String lexPred, Sign lexSign) {
method dominates (line 214) | private static boolean dominates(Nominal a, Nominal b, List<SatOp> pre...
method convertNominalsToVars (line 243) | static void convertNominalsToVars(List<SatOp> preds) {
method convertNominalsToVars (line 250) | static Nominal convertNominalsToVars(List<SatOp> preds, Nominal nomina...
method convertNominalToVar (line 271) | static Nominal convertNominalToVar(Nominal nom) {
FILE: src/opennlp/ccg/hylo/Diamond.java
class Diamond (line 32) | public final class Diamond extends ModalOp {
method Diamond (line 36) | public Diamond(Element e) {
method Diamond (line 40) | public Diamond(Mode mode, LF arg) {
method copy (line 44) | public LF copy() {
method equals (line 48) | public boolean equals(Object o) {
method unifyCheck (line 56) | public void unifyCheck(Object u) throws UnifyFailure {
method unify (line 64) | public Object unify(Object u, Substitution sub) throws UnifyFailure {
method fill (line 77) | public Object fill(Substitution sub) throws UnifyFailure {
method modalOpString (line 82) | public String modalOpString() {
method toXml (line 89) | public Element toXml() {
FILE: src/opennlp/ccg/hylo/EPsScorer.java
class EPsScorer (line 34) | public class EPsScorer {
class Results (line 39) | public static class Results {
method toString (line 51) | public String toString() {
method initNF (line 64) | private static NumberFormat initNF() {
method score (line 76) | public static Results score(LF lf, LF goldLF) {
method getDep (line 131) | private static Pair<Nominal,Nominal> getDep(SatOp ep) {
method fscore (line 144) | public static double fscore(double recall, double precision) {
FILE: src/opennlp/ccg/hylo/EnglishAgreementExtractor.java
class EnglishAgreementExtractor (line 57) | public class EnglishAgreementExtractor implements FeatureExtractor{
class FeatureMapWrapper (line 60) | public static class FeatureMapWrapper {
method FeatureMapWrapper (line 62) | public FeatureMapWrapper(FeatureMap featureMap) { this.featureMap = ...
class SignProps (line 66) | private class SignProps{
method SignProps (line 72) | public SignProps(String unbalPunct){
method getUnbalancedPunct (line 76) | public String getUnbalancedPunct(){
method EnglishAgreementExtractor (line 112) | public EnglishAgreementExtractor() {
method EnglishAgreementExtractor (line 119) | public EnglishAgreementExtractor(String sentId) {
method setAlphabet (line 128) | public void setAlphabet(Alphabet alphabet) {
method init (line 133) | public void init() {
method extractFeatures (line 169) | public FeatureVector extractFeatures(Sign sign, boolean complete) {
method addFeatures (line 176) | protected void addFeatures(Sign sign, boolean complete) {
method getOfComplSign (line 338) | public Sign getOfComplSign(){
method storeFeatureMap (line 346) | protected void storeFeatureMap(Sign sign) {
method getFeatureMap (line 351) | public FeatureMap getFeatureMap(Sign sign) {
method inc (line 360) | protected void inc(List<List<TrieMap.KeyExtractor<String>>> extractors) {
method getSubjectFeature (line 371) | public String getSubjectFeature(Category cat){
method isPunct (line 393) | private boolean isPunct(Word w) {
method getLowerSiblingDeps (line 404) | public Hashtable<LexDependency,Sign> getLowerSiblingDeps(Sign[] inputs...
method getSibling (line 424) | private Sign getSibling(List<LexDependency> sdeps,String rel){
method adjustWord (line 440) | private String adjustWord(String word) {
method adjustPOS (line 455) | private String adjustPOS(String word,String pos,String semClass) {
method adjustSemClass (line 474) | private String adjustSemClass(String semClass) {
method add_prefix_main1 (line 484) | private void add_prefix_main1(List<TrieMap.KeyExtractor<String>> retva...
method add_prefix_main2 (line 487) | private void add_prefix_main2(List<TrieMap.KeyExtractor<String>> retva...
method add_prefix_main3 (line 490) | private void add_prefix_main3(List<TrieMap.KeyExtractor<String>> retva...
method add_prefix_main4 (line 493) | private void add_prefix_main4(List<TrieMap.KeyExtractor<String>> retva...
method add_prefix_main5 (line 496) | private void add_prefix_main5(List<TrieMap.KeyExtractor<String>> retva...
method add_instance_num (line 501) | private void add_instance_num(List<TrieMap.KeyExtractor<String>> retva...
method add_prefix_sub1 (line 506) | private void add_prefix_sub1(List<TrieMap.KeyExtractor<String>> retval) {
method add_prefix_sub2 (line 509) | private void add_prefix_sub2(List<TrieMap.KeyExtractor<String>> retval) {
method add_prefix_sub3 (line 512) | private void add_prefix_sub3(List<TrieMap.KeyExtractor<String>> retval) {
method add_prefix_sub4 (line 515) | private void add_prefix_sub4(List<TrieMap.KeyExtractor<String>> retval) {
method add_prefix_sub5 (line 518) | private void add_prefix_sub5(List<TrieMap.KeyExtractor<String>> retval) {
method add_prefix_sub6 (line 521) | private void add_prefix_sub6(List<TrieMap.KeyExtractor<String>> retval) {
method add_prefix (line 526) | private void add_prefix(int prefix,List<TrieMap.KeyExtractor<String>> ...
method dep_word_head_word (line 540) | private List<TrieMap.KeyExtractor<String>> dep_word_head_word(int pref...
method dep_word_head_pos (line 551) | private List<TrieMap.KeyExtractor<String>> dep_word_head_pos(int prefi...
method dep_pos_head_word (line 562) | private List<TrieMap.KeyExtractor<String>> dep_pos_head_word(int prefi...
method dep_pos_head_pos (line 573) | private List<TrieMap.KeyExtractor<String>> dep_pos_head_pos(int prefix) {
method dep_word_head_stem (line 584) | private List<TrieMap.KeyExtractor<String>> dep_word_head_stem(int pref...
method dep_word_head_class (line 595) | private List<TrieMap.KeyExtractor<String>> dep_word_head_class(int pre...
method unbal_punct (line 606) | private List<TrieMap.KeyExtractor<String>> unbal_punct() {
method add_head_word (line 613) | private void add_head_word(List<TrieMap.KeyExtractor<String>> retval) {
method add_head_stem (line 618) | private void add_head_stem(List<TrieMap.KeyExtractor<String>> retval) {
method add_head_class (line 623) | private void add_head_class(List<TrieMap.KeyExtractor<String>> retval) {
method add_head_pos (line 628) | private void add_head_pos(List<TrieMap.KeyExtractor<String>> retval) {
method add_dep_word (line 633) | private void add_dep_word(List<TrieMap.KeyExtractor<String>> retval) {
method add_dep_pos (line 638) | private void add_dep_pos(List<TrieMap.KeyExtractor<String>> retval) {
FILE: src/opennlp/ccg/hylo/Flattener.java
class Flattener (line 35) | public class Flattener {
method getHighestParentMap (line 72) | public Map<Nominal,Nominal> getHighestParentMap() {
method flatten (line 83) | public List<SatOp> flatten(LF lf) {
method flatten (line 90) | private void flatten(
method makeDummySatOp (line 218) | private static SatOp makeDummySatOp(Nominal nom) {
method addSatOp (line 223) | private void addSatOp(SatOp satOp, SatOp parent, int depth, Stack<Alt>...
method propAltsOptsChunks (line 244) | private void propAltsOptsChunks() {
method propAltsOptsChunks (line 255) | private void propAltsOptsChunks(SatOp satOp, List<Alt> alts, TIntArray...
FILE: src/opennlp/ccg/hylo/HyloAtom.java
class HyloAtom (line 32) | public abstract class HyloAtom extends HyloFormula {
method HyloAtom (line 39) | protected HyloAtom(String name) {
method HyloAtom (line 43) | protected HyloAtom(String name, SimpleType st) {
method setAtomName (line 47) | public void setAtomName(String name) {
method getName (line 51) | public String getName() { return _name; }
method getType (line 53) | public SimpleType getType() { return type; }
method occurs (line 55) | public boolean occurs(Variable var) {
method toString (line 59) | public String toString() {
method prettyPrint (line 66) | public String prettyPrint(String indent) {
method compareTo (line 70) | public int compareTo(HyloAtom ha) {
method hashCode (line 75) | public int hashCode() {
method equals (line 83) | public boolean equals(Object obj) {
method hashCode (line 93) | public int hashCode(TObjectIntHashMap varMap) { return hashCode(); }
method equals (line 99) | public boolean equals(Object obj, TObjectIntHashMap varMap, TObjectInt...
FILE: src/opennlp/ccg/hylo/HyloFormula.java
class HyloFormula (line 36) | public abstract class HyloFormula implements LF, Serializable {
method setChunks (line 54) | public void setChunks(TIntArrayList chunks) { this.chunks = chunks; }
method getChunks (line 59) | public TIntArrayList getChunks() { return chunks; }
method getType (line 63) | public SimpleType getType() { return null; }
method copy (line 70) | public abstract LF copy();
method deepMap (line 78) | public void deepMap(ModFcn mf) {
method unify (line 98) | public Object unify(Object u, Substitution s) throws UnifyFailure {
method unifyCheck (line 122) | public void unifyCheck(Object u) throws UnifyFailure {}
method fill (line 133) | public Object fill(Substitution s) throws UnifyFailure {
method hashCode (line 140) | public abstract int hashCode(TObjectIntHashMap varMap);
method equals (line 146) | public abstract boolean equals(Object obj, TObjectIntHashMap varMap, T...
method toXml (line 151) | public abstract Element toXml();
method prettyPrint (line 156) | public abstract String prettyPrint(String indent);
FILE: src/opennlp/ccg/hylo/HyloHelper.java
class HyloHelper (line 39) | public class HyloHelper {
method getLF (line 48) | public static LF getLF(Element e) {
method getName (line 96) | private static String getName(Element e) {
method existingType (line 103) | private static SimpleType existingType(String name) {
method prefix (line 110) | protected static String prefix(String name) {
method type (line 117) | protected static SimpleType type(String name) {
method getLF_FromChildren (line 129) | @SuppressWarnings("unchecked")
method toXml (line 148) | public static Element toXml(LF lf) {
method processChunks (line 164) | public static void processChunks(Element e) {
method processChunks (line 170) | @SuppressWarnings("unchecked")
method convertChunks (line 193) | private static TIntArrayList convertChunks(String chunks) {
class ElementTest (line 205) | private static abstract class ElementTest {
method test (line 206) | abstract boolean test(Element elt);
method removeElts (line 210) | @SuppressWarnings("unchecked")
method removeConjOps (line 234) | private static void removeConjOps(Element lfElt) {
method removeChunkElts (line 247) | private static void removeChunkElts(Element lfElt) {
method isElementaryPredication (line 266) | public static boolean isElementaryPredication(LF lf) {
method isLexPred (line 274) | public static boolean isLexPred(LF pred) {
method isRelPred (line 285) | public static boolean isRelPred(LF pred) {
method isAttrPred (line 299) | public static boolean isAttrPred(LF pred) {
method isAttr (line 311) | public static boolean isAttr(LF arg) {
method getLexPred (line 323) | public static String getLexPred(LF lf) {
method getRel (line 333) | public static String getRel(LF lf) {
method getVal (line 343) | public static String getVal(LF lf) {
method getPrincipalNominal (line 355) | public static Nominal getPrincipalNominal(LF lf) {
method getSecondaryNominal (line 364) | public static Nominal getSecondaryNominal(LF lf) {
method flattenLF (line 381) | @SuppressWarnings("unchecked")
method getPreds (line 396) | public static List<SatOp> getPreds(LF lf) {
method getFirstPred (line 415) | public static SatOp getFirstPred(LF lf) {
method flatten (line 432) | public static List<SatOp> flatten(LF lf) {
method firstEP (line 442) | public static LF firstEP(LF lf) {
method setOrigin (line 450) | public static void setOrigin(LF lf, LexSemOrigin origin) {
method nomIndex (line 466) | public static Map<Nominal,Integer> nomIndex(List<SatOp> preds) {
method isRoot (line 479) | public static boolean isRoot(Nominal nom, List<SatOp> preds) {
method getUnfilledLexDeps (line 492) | public static List<LexDependency> getUnfilledLexDeps(LF lf) {
method getFilledLexDeps (line 502) | public static List<LexDependency> getFilledLexDeps(List<LexDependency>...
method getSemFeatsForHead (line 511) | public static List<SatOp> getSemFeatsForHead(Nominal nominal, LF lf) {
method compactAndConvertNominals (line 526) | public static LF compactAndConvertNominals(LF lf, Nominal root) {
method compactAndConvertNominals (line 533) | public static LF compactAndConvertNominals(LF lf, Nominal root, Sign r...
method compact (line 546) | public static LF compact(LF lf, Nominal root) {
method convertNominals (line 555) | public static void convertNominals(LF lf) {
method convertNominals (line 564) | public static Nominal convertNominals(LF lf, Sign root, Nominal nomina...
method convertNominalsToVars (line 572) | public static Nominal convertNominalsToVars(LF lf, Nominal nominalRoot) {
method append (line 587) | public static LF append(LF lf1, LF lf2) {
method sort (line 633) | public static void sort(LF lf) {
method sort (line 642) | public static void sort(List<? extends LF> preds) {
method compare (line 648) | public int compare(LF lf1, LF lf2){
method epType (line 679) | private static Integer epType(LF lf) {
method check (line 703) | public static void check(LF lf) throws UnifyFailure {
method check (line 709) | private static void check(List<LF> preds) throws UnifyFailure {
FILE: src/opennlp/ccg/hylo/HyloVar.java
class HyloVar (line 35) | public class HyloVar extends HyloFormula implements Variable, Indexed {
method HyloVar (line 44) | public HyloVar(String name) {
method HyloVar (line 48) | public HyloVar(String name, SimpleType st) {
method HyloVar (line 52) | protected HyloVar(String name, int index, SimpleType st) {
method name (line 59) | public String name() {
method copy (line 63) | public LF copy() {
method getIndex (line 68) | public int getIndex() {
method setIndex (line 72) | public void setIndex(int index) {
method getType (line 77) | public SimpleType getType() {
method occurs (line 81) | public boolean occurs(Variable var) {
method equals (line 85) | public boolean equals(Object o) {
method compareTo (line 92) | public int compareTo(HyloVar hv) {
method unify (line 101) | public Object unify(Object u, Substitution sub) throws UnifyFailure {
method fill (line 147) | public Object fill(Substitution sub) throws UnifyFailure {
method toString (line 156) | public String toString() {
method nameWithType (line 163) | public String nameWithType() {
method prettyPrint (line 172) | public String prettyPrint(String indent) {
method hashCode (line 177) | public int hashCode() {
method hashCode (line 184) | public int hashCode(TObjectIntHashMap varMap) {
method equals (line 199) | public boolean equals(Object obj, TObjectIntHashMap varMap, TObjectInt...
method toXml (line 210) | public Element toXml() {
FILE: src/opennlp/ccg/hylo/LexDepFeatureExtractor.java
class LexDepFeatureExtractor (line 52) | public class LexDepFeatureExtractor implements FeatureExtractor {
method sibPrecedenceForDep (line 70) | public static String sibPrecedenceForDep(String depConst) {
class ConditionalLazyExtractor (line 126) | public static abstract class ConditionalLazyExtractor {
method test (line 127) | abstract boolean test();
class ConditionalLazyEvaluator (line 132) | public static abstract class ConditionalLazyEvaluator extends Conditio...
method eval (line 133) | abstract float eval();
class FeatureMapWrapper (line 137) | public static class FeatureMapWrapper {
method FeatureMapWrapper (line 139) | public FeatureMapWrapper(FeatureMap featureMap) { this.featureMap = ...
method LexDepFeatureExtractor (line 221) | public LexDepFeatureExtractor() {
method setAlphabet (line 246) | public void setAlphabet(Alphabet alphabet) {
method extractFeatures (line 252) | public FeatureVector extractFeatures(Sign sign, boolean complete) {
method addFeatures (line 258) | protected void addFeatures(Sign sign, boolean complete) {
method storeFeatureMap (line 319) | protected void storeFeatureMap(Sign sign) {
method getFeatureMap (line 324) | protected FeatureMap getFeatureMap(Sign sign) {
method inc (line 332) | protected void inc(List<List<TrieMap.KeyExtractor<String>>> extractors) {
method incCond (line 342) | protected void incCond(List<ConditionalLazyExtractor> condExtractors) {
method addCond (line 354) | protected void addCond(List<ConditionalLazyEvaluator> condEvaluators) {
method getHeadBroadPOS (line 368) | private String getHeadBroadPOS(LexDependency dep) {
method setDepIndexes (line 377) | private void setDepIndexes(LexDependency dep) {
method getHeadDepOrder (line 384) | private String getHeadDepOrder() {
method setSibIndex (line 389) | private void setSibIndex(LexDependency sib) {
method getHeadSibOrder (line 395) | private String getHeadSibOrder() {
method depPrecedesSib (line 400) | private boolean depPrecedesSib() {
class DefiniteNP
Condensed preview — 981 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (9,077K chars).
[
{
"path": ".gitignore",
"chars": 886,
"preview": "*.sw?\n*~\n*.jar\n*.so\n*.class\n*.pyc\n*.tgz\n*.gz\n*.lm\n*.3bo\n*.mod\nvocab.*\n!vocab.flm\n.project\n.classpath\nbin/ccg2xml.py\nbin/"
},
{
"path": "AUTHORS",
"chars": 1576,
"preview": "Main Authors:\n Core Java Code:\n Jason Baldridge <jmb -at- cogsci.ed.ac.uk>\n Gann Bierner <gbierner -"
},
{
"path": "CHANGES",
"chars": 20276,
"preview": "0.9.6 - ...\r\n-----------\r\n\r\n* Updated .gitignore, CHANGES and docs/index.html for transition to\r\n GitHub\r\n\r\n\r\n0.9.5 - d"
},
{
"path": "LICENSE",
"chars": 24384,
"preview": "\nGNU LESSER GENERAL PUBLIC LICENSE\n\nVersion 2.1, February 1999\n\n(The master copy of this license lives on the GNU websit"
},
{
"path": "README.md",
"chars": 10294,
"preview": "# OpenCCG\n\nOpenCCG is a system for parsing and generating text using [combinatory categorial grammar](https://en.wikiped"
},
{
"path": "SAMPLE_GRAMMARS",
"chars": 2836,
"preview": " \nThis SAMPLE_GRAMMARS file describes the sample grammars that come \nwith the distribution, and provides an overview of"
},
{
"path": "TODO",
"chars": 2277,
"preview": "\nGeneral OpenCCG development:\n----------------------------\n\n- Add check for target LF when adding/writing full words (in"
},
{
"path": "bin/ccg-build",
"chars": 1152,
"preview": "#!/bin/sh\n. ccg-env\nANT_HOME=\"$OPENCCG_LIB\"\nPROPS=\"-Dant.home=$ANT_HOME -Dopenccg.home=$OPENCCG_HOME\"\ncase `uname` in\n "
},
{
"path": "bin/ccg-build.bat",
"chars": 642,
"preview": "@echo off\r\ncall ccg-env\r\nset ANT_HOME=%OPENCCG_LIB%\r\nset PROPS=-Dant.home=%ANT_HOME% -Dopenccg.home=%OPENCCG_HOME%\r\nset "
},
{
"path": "bin/ccg-cvr",
"chars": 110,
"preview": "#!/bin/sh\n# For usage, do: ccg-cvr -h\n. ccg-env\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.test.CrossValidateRealizer $@\n\n"
},
{
"path": "bin/ccg-cvr.bat",
"chars": 113,
"preview": "@echo off\r\nrem For usage, do: ccg-cvr -h\r\ncall ccg-env\r\n%JAVA_CMD% opennlp.ccg.test.CrossValidateRealizer %* \r\n\r\n"
},
{
"path": "bin/ccg-draw-graph",
"chars": 57,
"preview": "#!/bin/sh\n\npython \"$OPENCCG_HOME/bin/dlf_parser.py\" \"$@\"\n"
},
{
"path": "bin/ccg-draw-graph.bat",
"chars": 53,
"preview": "@echo off\r\npython %OPENCCG_HOME%/bin/dlf_parser.py %*"
},
{
"path": "bin/ccg-draw-tree",
"chars": 60,
"preview": "#!/bin/sh\n\npython \"$OPENCCG_HOME/bin/ccg_draw_tree.py\" \"$@\"\n"
},
{
"path": "bin/ccg-draw-tree.bat",
"chars": 56,
"preview": "@echo off\r\npython %OPENCCG_HOME%/bin/ccg_draw_tree.py %*"
},
{
"path": "bin/ccg-env",
"chars": 1423,
"preview": "#!/bin/sh\n# sets OpenCCG environment variables\n\nif [ \"$JAVA_HOME\" = \"\" ] ; then\n echo\n echo \"Error: JAVA_HOME not foun"
},
{
"path": "bin/ccg-env.bat",
"chars": 1435,
"preview": "@echo off\r\nrem sets OpenCCG environment variables\r\n\r\nif not exist \"%JAVA_HOME%\" goto no_JAVA_HOME\r\nif not exist \"%OPENCC"
},
{
"path": "bin/ccg-grammardoc",
"chars": 492,
"preview": "#!/bin/sh\n#\n# $Id: ccg-grammardoc,v 1.2 2006/12/03 17:14:23 mwhite14850 Exp $\n# Script to run grammardoc from the comman"
},
{
"path": "bin/ccg-grammardoc.bat",
"chars": 260,
"preview": "@echo off\r\nrem Usage: ccg-grammardoc [-s|--source sourceDir] [-d|--dest destDir]\r\ncall ccg-env\r\nset ANT_HOME=%OPENCCG_HO"
},
{
"path": "bin/ccg-gt",
"chars": 98,
"preview": "#!/bin/sh\n# For usage, do: ccg-gt -h\n. ccg-env\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.test.GenTargets $@\n\n"
},
{
"path": "bin/ccg-gt.bat",
"chars": 100,
"preview": "@echo off\r\nrem For usage, do: ccg-gt -h\r\ncall ccg-env\r\n%JAVA_CMD% opennlp.ccg.test.GenTargets %*\r\n\r\n"
},
{
"path": "bin/ccg-ht-factors",
"chars": 152,
"preview": "#!/bin/sh\n. ccg-env\n#CP=$CP:$OPENCCG_HOME/lib/jopt-simple.jar\n#echo $JAVA_ARGS\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.realize.hy"
},
{
"path": "bin/ccg-hypertagger",
"chars": 145,
"preview": "#!/bin/sh\n. ccg-env\n#CP=$CP:$OPENCCG_HOME/lib/jopt-simple.jar\n#echo $JAVA_ARGS\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.realize.hy"
},
{
"path": "bin/ccg-hypertagger.bat",
"chars": 85,
"preview": "@echo off\r\ncall ccg-env\r\n%JAVA_CMD% opennlp.ccg.realize.hypertagger.TagExtract %*\r\n\r\n"
},
{
"path": "bin/ccg-parse",
"chars": 83,
"preview": "#!/bin/sh\n# Usage: ccg-parse -h\n. ccg-env\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.Parse $@\n\n"
},
{
"path": "bin/ccg-parse.bat",
"chars": 110,
"preview": "@echo off\r\nrem Usage: ccg-parse -h\r\ncall ccg-env\r\n%JAVA_CMD% opennlp.ccg.Parse %1 %2 %3 %4 %5 %6 %7 %8 %9 \r\n\r\n"
},
{
"path": "bin/ccg-postagger",
"chars": 86,
"preview": "#!/bin/sh\n. ccg-env\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.parse.postagger.BasicPOSTagger $@\n\n"
},
{
"path": "bin/ccg-postagger.bat",
"chars": 85,
"preview": "@echo off\r\ncall ccg-env\r\n%JAVA_CMD% opennlp.ccg.parse.postagger.BasicPOSTagger %*\r\n\r\n"
},
{
"path": "bin/ccg-realize",
"chars": 130,
"preview": "#!/bin/sh\n# Usage: ccg-realize (-g <grammarfile>) <inputfile> (<outputfile>)\n. ccg-env\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.Re"
},
{
"path": "bin/ccg-realize.bat",
"chars": 211,
"preview": "@echo off\r\nrem Usage: ccg-realize (-g <grammarfile>) <inputfile> (<outputfile>)\r\ncall ccg-env\r\nrem set HPROF=-Xrunhprof:"
},
{
"path": "bin/ccg-supertagger",
"chars": 111,
"preview": "#!/bin/sh\n. ccg-env\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.parse.supertagger.WordAndPOSDictionaryLabellingStrategy $@\n\n"
},
{
"path": "bin/ccg-supertagger.bat",
"chars": 110,
"preview": "@echo off\r\ncall ccg-env\r\n%JAVA_CMD% opennlp.ccg.parse.supertagger.WordAndPOSDictionaryLabellingStrategy %*\r\n\r\n"
},
{
"path": "bin/ccg-test",
"chars": 101,
"preview": "#!/bin/sh\n# For usage, do: ccg-test -h\n. ccg-env\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.test.Regression \"$@\"\n"
},
{
"path": "bin/ccg-test.bat",
"chars": 156,
"preview": "@echo off\r\nrem For usage, do: ccg-test -h\r\ncall ccg-env\r\nrem set HPROF=-Xrunhprof:cpu=times,file=hmm-prof.txt\r\n%JAVA_CMD"
},
{
"path": "bin/ccg-update",
"chars": 105,
"preview": "#!/bin/sh\n# For usage, do: ccg-update -h\n. ccg-env\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.test.UpdateTestbed $@\n\n"
},
{
"path": "bin/ccg-update.bat",
"chars": 107,
"preview": "@echo off\r\nrem For usage, do: ccg-update -h\r\ncall ccg-env\r\n%JAVA_CMD% opennlp.ccg.test.UpdateTestbed %*\r\n\r\n"
},
{
"path": "bin/ccg2xml",
"chars": 54,
"preview": "#!/bin/sh\n\npython \"$OPENCCG_HOME/bin/ccg2xml.py\" \"$@\"\n"
},
{
"path": "bin/ccg2xml.bat",
"chars": 50,
"preview": "@echo off\r\npython %OPENCCG_HOME%/bin/ccg2xml.py %*"
},
{
"path": "bin/ccg_draw_tree.py",
"chars": 2362,
"preview": "#\r\n# ccg_draw_tree uses nltk.Tree to draw a tree from a CCGbank .auto file, \r\n# or to draw two trees from two .auto file"
},
{
"path": "bin/dlf_parser.py",
"chars": 9487,
"preview": "#\n# dlf_parser.py (invoked by ccg-draw-graph) uses graphviz's dot to visualize (D)LF graphs\n#\n# author: Jonathan Barker "
},
{
"path": "bin/tccg",
"chars": 89,
"preview": "#!/bin/sh\n# For usage, do: tccg -h\n. ccg-env\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.TextCCG \"$@\"\n"
},
{
"path": "bin/tccg.bat",
"chars": 90,
"preview": "@echo off\r\nrem For usage, do: tccg -h\r\ncall ccg-env\r\n%JAVA_CMD% opennlp.ccg.TextCCG %*\r\n\r\n"
},
{
"path": "bin/visccg",
"chars": 57,
"preview": "#!/bin/sh\n\npython \"$OPENCCG_HOME/bin/ccg_editor.py\" \"$@\"\n"
},
{
"path": "bin/visccg.bat",
"chars": 53,
"preview": "@echo off\npython %OPENCCG_HOME%/bin/ccg_editor.py %*\n"
},
{
"path": "bin/wccg",
"chars": 88,
"preview": "#!/bin/sh\n# For usage, do: tccg -h\n. ccg-env\n\"$JAVA\" $JAVA_ARGS opennlp.ccg.WebCCG \"$@\"\n"
},
{
"path": "build.xml",
"chars": 12634,
"preview": "<!-- $Id: build.xml,v 1.61 2011/12/14 03:11:05 mwhite14850 Exp $ -->\n<!-- Copyright (C) 2003-13 Jason Baldridge, Michael"
},
{
"path": "ccg-format-grammars/arabic/arabic.ccg",
"chars": 26364,
"preview": "#############################################################\n# "
},
{
"path": "ccg-format-grammars/inherit/inherit.ccg",
"chars": 1106,
"preview": "# A minimal grammar which shows inheritance.\n# Jason Baldridge, September 2007\n\n################## Features ############"
},
{
"path": "ccg-format-grammars/tiny/tiny.ccg",
"chars": 24325,
"preview": "#############################################################\n# "
},
{
"path": "ccg-format-grammars/tinytiny/tinytiny.ccg",
"chars": 3048,
"preview": "# A truly minimal grammar for CCG.\n# Ben Wing, May 2006\n\n################## Features #################\n\n\n\nfeature {\n CA"
},
{
"path": "ccgbank/bin/american-to-logical-quotes.py",
"chars": 992,
"preview": "\"\"\"\nCopryright (c) 2011 Dennis N. Mehay\n\nAssumes tokenized, PTB3-normalized UTF-8 text, one sentence per line.\n\n<stdin> "
},
{
"path": "ccgbank/bin/convert-mtc-systems.py",
"chars": 4455,
"preview": "\"\"\"\nThis program takes a set of documents (all streamed from stdin at once) and formats them in a way suitable for\nuse w"
},
{
"path": "ccgbank/bin/convert-spaces-to-newlines.py",
"chars": 196,
"preview": "#\n# converts spaces to newlines, and newlines to special <eol> chars,\n# from stdin to stdout\n#\nimport sys, re;\n[sys.stdo"
},
{
"path": "ccgbank/bin/convert_all",
"chars": 234,
"preview": "#!/bin/bash\nfor i in 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24\ndo\n echo \"Starting con"
},
{
"path": "ccgbank/bin/correlate-to-judgments.py",
"chars": 3917,
"preview": "\"\"\"\nCorrelate human judgments (streamed into sys.stdin -- e.g., from the MTC)\nto the BLEU/NIST scores in the given direc"
},
{
"path": "ccgbank/bin/filter_feats.py",
"chars": 2884,
"preview": "#!/usr/bin/env python\n\"\"\"\n(c) 2008 Dennis N. Mehay\nUse this file any way you want, just please give the\nauthor credit if"
},
{
"path": "ccgbank/bin/find-betas-no-gold.py",
"chars": 6925,
"preview": "\"\"\"\nGiven:\n\n(1) a file supertagged words (OpenCCC file output format\nas produced by, e.g., WordAndPOSDictionaryLabelling"
},
{
"path": "ccgbank/bin/gen_parser_events_a",
"chars": 132,
"preview": "#!/bin/bash\nfor i in 02 03 04 05\ndo\n ccg-build -Dsect=$i -f build-ps.xml gen-parser-events &> logs/log.gen.parser.even"
},
{
"path": "ccgbank/bin/gen_parser_events_b",
"chars": 132,
"preview": "#!/bin/bash\nfor i in 06 07 08 09\ndo\n ccg-build -Dsect=$i -f build-ps.xml gen-parser-events &> logs/log.gen.parser.even"
},
{
"path": "ccgbank/bin/gen_parser_events_c",
"chars": 132,
"preview": "#!/bin/bash\nfor i in 10 11 12 13\ndo\n ccg-build -Dsect=$i -f build-ps.xml gen-parser-events &> logs/log.gen.parser.even"
},
{
"path": "ccgbank/bin/gen_parser_events_d",
"chars": 132,
"preview": "#!/bin/bash\nfor i in 14 15 16 17\ndo\n ccg-build -Dsect=$i -f build-ps.xml gen-parser-events &> logs/log.gen.parser.even"
},
{
"path": "ccgbank/bin/gen_parser_events_e",
"chars": 132,
"preview": "#!/bin/bash\nfor i in 18 19 20 21\ndo\n ccg-build -Dsect=$i -f build-ps.xml gen-parser-events &> logs/log.gen.parser.even"
},
{
"path": "ccgbank/bin/gen_realizer_events_a",
"chars": 136,
"preview": "#!/bin/bash\nfor i in 02 03 04 05\ndo\n ccg-build -Dsect=$i -f build-rz.xml gen-realizer-events &> logs/log.gen.realizer."
},
{
"path": "ccgbank/bin/gen_realizer_events_b",
"chars": 136,
"preview": "#!/bin/bash\nfor i in 06 07 08 09\ndo\n ccg-build -Dsect=$i -f build-rz.xml gen-realizer-events &> logs/log.gen.realizer."
},
{
"path": "ccgbank/bin/gen_realizer_events_c",
"chars": 136,
"preview": "#!/bin/bash\nfor i in 10 11 12 13\ndo\n ccg-build -Dsect=$i -f build-rz.xml gen-realizer-events &> logs/log.gen.realizer."
},
{
"path": "ccgbank/bin/gen_realizer_events_d",
"chars": 136,
"preview": "#!/bin/bash\nfor i in 14 15 16 17\ndo\n ccg-build -Dsect=$i -f build-rz.xml gen-realizer-events &> logs/log.gen.realizer."
},
{
"path": "ccgbank/bin/gen_realizer_events_e",
"chars": 136,
"preview": "#!/bin/bash\nfor i in 18 19 20 21\ndo\n ccg-build -Dsect=$i -f build-rz.xml gen-realizer-events &> logs/log.gen.realizer."
},
{
"path": "ccgbank/bin/get-text-from-mtc-style.py",
"chars": 644,
"preview": "\"\"\"\nGets the text from a MTC-style corpus.\nJust looks for '<seg id=...> </seg>' segments.\n\n<stdin> => <stdout>\n\n(c) 2011"
},
{
"path": "ccgbank/bin/get-truecase-list.py",
"chars": 3640,
"preview": "\"\"\"\nRequires Python >= 2.6x+ but < 3.0.\n\nTakes in a stream (from stdin) or file of *tokenized* plain text (utf-8),\nretur"
},
{
"path": "ccgbank/bin/get-uniq-nbest.py",
"chars": 891,
"preview": "from BeautifulSoup import BeautifulStoneSoup as BSS\nimport codecs\nimport sys, os\nstreamWriter = codecs.lookup('utf-8')[-"
},
{
"path": "ccgbank/bin/get_factors_from_parse.py",
"chars": 2137,
"preview": "#!/usr/bin/env python\n\n\"\"\"\n(c) 2008 Dennis N. Mehay\nUse this file any way you want, just please give the\nauthor credit i"
},
{
"path": "ccgbank/bin/get_just_words_from_ner_text.py",
"chars": 942,
"preview": "\"\"\"\nTakes NE tagged text from stdin (assuming utf-8) and does just what it says: prints to stdout only the words.\n\"\"\"\nim"
},
{
"path": "ccgbank/bin/lowercase_tagged_text.py",
"chars": 1774,
"preview": "#!/usr/bin/env python\n\"\"\"\nTakes a POS-tagged file and writes out the text with tokens lowercased except for\nproper nouns"
},
{
"path": "ccgbank/bin/merge-mtc-ids.py",
"chars": 1824,
"preview": "\"\"\"\nThis program re-inserts the MTC unique IDs (sys+DOC+segment) into an auto-number-ID'ed parse of said\nMTC (or similar"
},
{
"path": "ccgbank/bin/merge-stanford-morpha-with-pos.py",
"chars": 2509,
"preview": "\"\"\"\nGiven two files: (1) the output of Stanford's re-implementation of 'morpha' and (2) the 'pairs' file of <word><space"
},
{
"path": "ccgbank/bin/merge_pos_ne.py",
"chars": 2243,
"preview": "#!/usr/bin/env python\n\"\"\"\nTakes a POS-tagged file and a file of the original, NE-tagged text and writes out a file of:\n\n"
},
{
"path": "ccgbank/bin/my_unicode.py",
"chars": 956,
"preview": "\"\"\"\nUseful functions for dealing with Unicode messiness that arises from dealing with messy\ninput (e.g., gibberish from "
},
{
"path": "ccgbank/bin/nbest-mtc-to-bleu-nist.py",
"chars": 4866,
"preview": "\"\"\"\nThis program takes the n-best realizer output as one file (with sys+DOC+segment IDs -- 'info' attributes),\nthe tb.xm"
},
{
"path": "ccgbank/bin/ner/NERApp/src/nerapp/NERApp.java",
"chars": 7045,
"preview": "package nerapp;\n\nimport java.io.*;\n//import edu.stanford.nlp.ie.crf.*;\nimport edu.stanford.nlp.ie.AbstractSequenceClassi"
},
{
"path": "ccgbank/bin/ner/build-ner-api.properties",
"chars": 100,
"preview": "# For compiling against the Stanford API.\nstanford.core.nlp=../../stanford-nlp/stanford-core-nlp.jar"
},
{
"path": "ccgbank/bin/ner/build-ner-api.xml",
"chars": 2154,
"preview": "<!--\n Copyright (C) 2012 Dennis N. Mehay\n \n This library is free software; you can redistribute it and/or\n modify it und"
},
{
"path": "ccgbank/bin/ner/ner-tag.sh",
"chars": 435,
"preview": "#!/bin/bash\n#\n# Tag a file $1 using Stanford NER located in base directory $2 (first arg, e.g., \"/home/me/stanford-ner-2"
},
{
"path": "ccgbank/bin/ner/ner_word.py",
"chars": 2090,
"preview": "class NERWord:\n \"\"\"\n A simple wrapper for NER-labelled words.\n \"\"\"\n def __init__(self, wd, label=None, delim"
},
{
"path": "ccgbank/bin/ner/post-process-stanford-ner.py",
"chars": 2233,
"preview": "import sys, optparse, os, codecs\nfrom ner_word import NERWord\nimport math\n\n\"\"\"\nTake Stanford 'inlineXML' output from std"
},
{
"path": "ccgbank/bin/normalize_text.py",
"chars": 1619,
"preview": "\"\"\"\nAssuming UTF-8 input (defaults to stdin, otherwise, supply a file), normalize plain text in\nvarious ways -- e.g., no"
},
{
"path": "ccgbank/bin/post-process-metricsmatr.py",
"chars": 513,
"preview": "\"\"\"\nPipe in a MetricsMATR-style mteval script output and, given (as sys.argv[1]) the name of the\nreference system, produ"
},
{
"path": "ccgbank/bin/prepare-for-stanford-morpha.py",
"chars": 724,
"preview": "\"\"\"\nTake in a space-delimited file of <word>...<POS>...<SEM-CLASS> and turn it into \na form that the Stanford NE recogni"
},
{
"path": "ccgbank/bin/reverse-spaces-to-newlines.py",
"chars": 216,
"preview": "#\n# reverses the conversion from spaces to newlines, and newlines to special <eol> chars,\n# from stdin to stdout\n#\nimpor"
},
{
"path": "ccgbank/bin/run-all-bleu.sh",
"chars": 1045,
"preview": "#!/bin/bash\n# run Bleu/NIST on all systems sys directory (passed in) with all references in the ref directory\n# (also pa"
},
{
"path": "ccgbank/bin/stem_nns_vbx",
"chars": 711,
"preview": "#/bin/bash\n#\n# produces a file <novelfile>.dir/morph from <novelfile>.dir/pairs\n# that lists the words, pos tags and, fo"
},
{
"path": "ccgbank/bin/toUTF-8.py",
"chars": 1415,
"preview": "\"\"\"\nCopryright (c) 2011 Dennis N. Mehay\n\nAssumes that 'chardet' is installed.\n\nRe-encodes most known Unicode encodings a"
},
{
"path": "ccgbank/bin/write_morph.py",
"chars": 1653,
"preview": "#!/usr/bin/env python\n\"\"\"\nTakes a file of word-tag pairs or word-tag-stem triples and writes an xml morph file.\n\n(c) 201"
},
{
"path": "ccgbank/build-ht.properties",
"chars": 1100,
"preview": "\n# nb: info.dir is specified in build.properties\npreds.train=${info.dir}/preds-train\npreds.dev=${info.dir}/preds-00-all\n"
},
{
"path": "ccgbank/build-ht.xml",
"chars": 13389,
"preview": "<!--\n Copyright (C) 2010-11 Dominic Espinosa and Michael White\n \n This library is free software; you can redistribute it"
},
{
"path": "ccgbank/build-models.properties",
"chars": 436,
"preview": "\n# nb: info.dir is specified in build.properties\nfactors.train=${info.dir}/factors-train\nfactors.dev=${info.dir}/factors"
},
{
"path": "ccgbank/build-models.xml",
"chars": 3365,
"preview": "<!--\n Copyright (C) 2010-11 Michael White\n \n This library is free software; you can redistribute it and/or\n modify it un"
},
{
"path": "ccgbank/build-original.properties",
"chars": 1264,
"preview": "\n# nb: original.ccgbank.dir and data.dir are specified in build.properties\n\noriginal.dir=./original\n\ncorpus.dir=${origin"
},
{
"path": "ccgbank/build-original.xml",
"chars": 14096,
"preview": "<!--\n Copyright (C) 2010 Michael White and Dennis Mehay\n \n This library is free software; you can redistribute it and/or"
},
{
"path": "ccgbank/build-ps.properties",
"chars": 1916,
"preview": "# nb: feats.dir and models.dir are specified in build-models.properties\n\nparser.feats.dir=${feats.dir}/parser\nparser.mod"
},
{
"path": "ccgbank/build-ps.xml",
"chars": 26345,
"preview": "<!--\n Copyright (C) 2011-12 Michael White, Dennis N. Mehay\n \n This library is free software; you can redistribute it and"
},
{
"path": "ccgbank/build-release.xml",
"chars": 8516,
"preview": "<!--\n Copyright (C) 2011 Michael White\n \n This library is free software; you can redistribute it and/or\n modify it under"
},
{
"path": "ccgbank/build-rz.properties",
"chars": 871,
"preview": "\n# nb: feats.dir and models.dir is specified in build-models.properties\nrealizer.feats.dir=${feats.dir}/realizer\nrealize"
},
{
"path": "ccgbank/build-rz.xml",
"chars": 18175,
"preview": "<!--\n Copyright (C) 2010-11 Michael White\n \n This library is free software; you can redistribute it and/or\n modify it un"
},
{
"path": "ccgbank/build-st.properties",
"chars": 810,
"preview": "\n# nb: feats.dir and models.dir are specified in build-models.properties\n\nsupertagger.feats.dir=${feats.dir}/supertagger"
},
{
"path": "ccgbank/build-st.xml",
"chars": 11774,
"preview": "<!--\n Copyright (C) 2010 Michael White and Dennis Mehay\n \n This library is free software; you can redistribute it and/or"
},
{
"path": "ccgbank/build.properties",
"chars": 857,
"preview": "\ndata.dir=./data\ntemplates.dir=./templates\nconvert.dir=./convert\nextract.dir=./extract\ninfo.dir=${extract.dir}/info\ntest"
},
{
"path": "ccgbank/build.xml",
"chars": 18577,
"preview": "<!--\n Copyright (C) 2005-2011 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you"
},
{
"path": "ccgbank/data/README",
"chars": 491,
"preview": "\nThe files wsj-nns-vb and wsj-nns-vb-stems contain the plural nouns & verbs and\ntheir stems, respectively, from the WSJ "
},
{
"path": "ccgbank/data/get_wsj_nns_vb",
"chars": 101,
"preview": "cat /home/corpora/EN/penn_treebank_3/ims-cwb/wsj/wsj.cqp | egrep \"NNS|VB\" | sort | uniq > wsj-nns-vb\n"
},
{
"path": "ccgbank/data/novel/two-sents",
"chars": 139,
"preview": "Google announced today that it would offer free texting on its Google Voice app for the iPhone.\nThe press release was gr"
},
{
"path": "ccgbank/data/sample/AUTO/00/wsj_0001.auto",
"chars": 1894,
"preview": "ID=wsj_0001.1 PARSER=GOLD NUMPARSE=1\n(<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 2 {join.01:ARG0}> (<T NP 0 2> (<T NP 0 2> "
},
{
"path": "ccgbank/data/stem_wsj_nns_vb",
"chars": 73,
"preview": "cat wsj-nns-vb | tr [:blank:] _ | ./morpha.ix86_linux > wsj-nns-vb-stems\n"
},
{
"path": "ccgbank/data/wsj-nns-vb",
"chars": 212745,
"preview": "12-inches\tNNS\n15\tVBN\n16-year-olds\tNNS\n1850s\tNNS\n1890s\tNNS\n18-year-olds\tNNS\n1900s\tNNS\n1920s\tNNS\n1930s\tNNS\n1940s\tNNS\n1950s"
},
{
"path": "ccgbank/data/wsj-nns-vb-stems",
"chars": 127054,
"preview": "12-inch\n15\n16-year-old\n1850\n1890\n18-year-old\n1900\n1920\n1930\n1940\n1950\n1960\n1970\n1980\n1990\n19-year-old\n204\n20\n20-stock\n30"
},
{
"path": "ccgbank/data/wsj_0595Corrected.auto",
"chars": 57036,
"preview": "ID=wsj_0595.1 PARSER=GOLD NUMPARSE=1\n(<T S[dcl] 1 2> (<T NP 0 1> (<L N NNS NNS Wives N>) ) (<T S[dcl]\\NP 0 2> (<T (S[dcl"
},
{
"path": "ccgbank/extract/add-chunks.xsl",
"chars": 2682,
"preview": "<?xml version=\"1.0\"?>\r\n<!-- \r\nCopyright (C) 2003-5 University of Edinburgh (Michael White)\r\n$Revision: 1.1 $, $Date: 200"
},
{
"path": "ccgbank/extract/convert-to-graph.xsl",
"chars": 2535,
"preview": "<?xml version=\"1.0\"?>\r\n<!-- \r\nCopyright (C) 2003-5 University of Edinburgh (Michael White)\r\n$Revision: 1.1 $, $Date: 200"
},
{
"path": "ccgbank/extract/convert-to-hlds.xsl",
"chars": 5054,
"preview": "<?xml version=\"1.0\"?>\r\n<!-- \r\nCopyright (C) 2003-5 University of Edinburgh (Michael White)\r\n$Revision: 1.1 $, $Date: 200"
},
{
"path": "ccgbank/extract/grammar.xml",
"chars": 648,
"preview": "<?xml version=\"1.0\"?>\n<grammar name=\"openccgbank\"\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n xsi:noNamesp"
},
{
"path": "ccgbank/extract/raise-nodes.xsl",
"chars": 3230,
"preview": "<?xml version=\"1.0\"?>\r\n<!-- \r\nCopyright (C) 2003-7 University of Edinburgh (Michael White)\r\n$Revision: 1.1 $, $Date: 200"
},
{
"path": "ccgbank/models/hypertagger/ht-prior.flm",
"chars": 322,
"preview": "\n## A prior probability model that estimates p(supertag | word, pos)\n## with smoothed back-off (a \"soft tagging dictiona"
},
{
"path": "ccgbank/models/hypertagger/ht.config",
"chars": 786,
"preview": "# location of FLM file and vocab file for prior model\n#priorModel=ht-prior.flm\n#priorModelVocab=vocab.st\n\n# maxent model"
},
{
"path": "ccgbank/models/hypertagger/ht2.config",
"chars": 878,
"preview": "# location of FLM file and vocab file for prior model\n#priorModel=stprior.flm\n#priorModelVocab=vocab.st\n\n# maxent model "
},
{
"path": "ccgbank/models/hypertagger/ht2.train.config",
"chars": 518,
"preview": "# config file for generating events during realizer training\n#\n# a single beta level (not the most strict one) is used, "
},
{
"path": "ccgbank/models/hypertagger/pos.config",
"chars": 220,
"preview": "# this model gives priors on POS tags.\n#priorModel=posprior.flm\n#priorModelVocab=vocab.pos\n\n# this is a Zhang Le-style M"
},
{
"path": "ccgbank/models/hypertagger/posprior.flm",
"chars": 298,
"preview": "\n## A prior probability model that estimates p(pos | word)\n## with smoothed back-off (a \"soft tagging dictionary\" if you"
},
{
"path": "ccgbank/models/hypertagger/vocab.flm",
"chars": 200,
"preview": "\n## flm file for determining the vocab\n\n3\n \n## word (W) unigram\n\nW : 0 w.count w.lm 1\n 0 0\n\n## supertag (T) unigram\n\nT "
},
{
"path": "ccgbank/models/parser/binary.flm",
"chars": 2199,
"preview": "\n## binary step of Hockenmaier's HWDep generative syntactic model\n## with added pos tags\n\n6\n \n## expansion (E) given par"
},
{
"path": "ccgbank/models/parser/gen-events.prefs",
"chars": 608,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE preferences SYSTEM \"http://java.sun.com/dtd/preferences.dtd\">\n<preferenc"
},
{
"path": "ccgbank/models/parser/leaf.flm",
"chars": 387,
"preview": "\n## leaf step of Hockenmaier's HWDep generative syntactic model\n## with added pos tags\n\n1\n \n## expansion (E) given paren"
},
{
"path": "ccgbank/models/parser/model.init",
"chars": 15,
"preview": "1\ngenlogprob 1\n"
},
{
"path": "ccgbank/models/parser/parse.prefs",
"chars": 608,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE preferences SYSTEM \"http://java.sun.com/dtd/preferences.dtd\">\n<preferenc"
},
{
"path": "ccgbank/models/parser/top.flm",
"chars": 788,
"preview": "\n## top step of Hockenmaier's HWDep generative syntactic model\n## with added pos tags\n\n4\n \n## head (H) given expansion ("
},
{
"path": "ccgbank/models/parser/unary.flm",
"chars": 598,
"preview": "\n## unary step of Hockenmaier's HWDep generative syntactic model\n## with added pos tags\n\n2\n \n## expansion (E) given pare"
},
{
"path": "ccgbank/models/parser/vocab.flm",
"chars": 727,
"preview": "\n## flm file for determining the vocab\n\n13\n \n## headword (W) unigram\n\nW : 0 w.count w.lm 1\n 0 0\n\n## headword top (WT) \n"
},
{
"path": "ccgbank/models/realizer/alph.init",
"chars": 65,
"preview": "6\ngenlogprob 1\n$ngram0 1\n$ngram1 1\n$ngram2 1\n$ngram3 1\n$deplen 1\n"
},
{
"path": "ccgbank/models/realizer/gen-events.prefs",
"chars": 1045,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE preferences SYSTEM \"http://java.sun.com/dtd/preferences.dtd\">\n<preferenc"
},
{
"path": "ccgbank/models/realizer/model.init",
"chars": 25,
"preview": "2\ngenlogprob 1\n$ngram0 1\n"
},
{
"path": "ccgbank/models/realizer/rz-test.prefs",
"chars": 1045,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE preferences SYSTEM \"http://java.sun.com/dtd/preferences.dtd\">\n<preferenc"
},
{
"path": "ccgbank/models/realizer/stp3.flm",
"chars": 359,
"preview": "\n## Supertags FLM\n\n## Supertag (T) based on POS tags (P), plus POS trigram\n\n2\n\n## POS trigram\nP : 2 P(-1) P(-2) p_p1p2.c"
},
{
"path": "ccgbank/models/supertagger/pos.config",
"chars": 939,
"preview": "# an example POS tagger config file (D.N. Mehay)\n# change to suit your needs (e.g., replace the following paths\n# with p"
},
{
"path": "ccgbank/models/supertagger/posprior.flm",
"chars": 298,
"preview": "\n## A prior probability model that estimates p(pos | word)\n## with smoothed back-off (a \"soft tagging dictionary\" if you"
},
{
"path": "ccgbank/models/supertagger/st.config",
"chars": 2099,
"preview": "# an example supertagger config file (D.N. Mehay)\n# change to suit your needs (e.g., replace the following paths\n# with "
},
{
"path": "ccgbank/models/supertagger/st.config.train",
"chars": 2028,
"preview": "# an example supertagger config file (D.N. Mehay)\n# change to suit your needs (e.g., replace the following paths\n# with "
},
{
"path": "ccgbank/models/supertagger/st.noprior.config",
"chars": 2177,
"preview": "# an example supertagger config file (D.N. Mehay)\n# change to suit your needs (e.g., replace the following paths\n# with "
},
{
"path": "ccgbank/models/supertagger/stprior.flm",
"chars": 322,
"preview": "\n## A prior probability model that estimates p(supertag | word, pos)\n## with smoothed back-off (a \"soft tagging dictiona"
},
{
"path": "ccgbank/models/supertagger/vocab.flm",
"chars": 200,
"preview": "\n## flm file for determining the vocab\n\n3\n \n## word (W) unigram\n\nW : 0 w.count w.lm 1\n 0 0\n\n## supertag (T) unigram\n\nT "
},
{
"path": "ccgbank/original/models/postagger/pos.config",
"chars": 939,
"preview": "# an example POS tagger config file (D.N. Mehay)\n# change to suit your needs (e.g., replace the following paths\n# with p"
},
{
"path": "ccgbank/original/models/postagger/posprior.flm",
"chars": 298,
"preview": "\n## A prior probability model that estimates p(pos | word)\n## with smoothed back-off (a \"soft tagging dictionary\" if you"
},
{
"path": "ccgbank/original/models/supertagger/st.config",
"chars": 1888,
"preview": "# an example supertagger config file (D.N. Mehay)\n# change to suit your needs (e.g., replace the following paths\n# with "
},
{
"path": "ccgbank/original/models/supertagger/stprior.flm",
"chars": 322,
"preview": "\n## A prior probability model that estimates p(supertag | word, pos)\n## with smoothed back-off (a \"soft tagging dictiona"
},
{
"path": "ccgbank/original/models/supertagger/vocab.flm",
"chars": 200,
"preview": "\n## flm file for determining the vocab\n\n3\n \n## word (W) unigram\n\nW : 0 w.count w.lm 1\n 0 0\n\n## supertag (T) unigram\n\nT "
},
{
"path": "ccgbank/plugins/MyGenSynScorer.java",
"chars": 505,
"preview": "\npackage plugins;\n\nimport opennlp.ccg.synsem.*;\nimport java.io.*;\n\npublic class MyGenSynScorer extends GenerativeSyntact"
},
{
"path": "ccgbank/plugins/MyNgramCombo.java",
"chars": 1847,
"preview": "package plugins;\r\n\r\nimport opennlp.ccg.ngrams.*;\r\nimport java.io.*;\r\nimport java.util.*;\r\nimport java.lang.Thread;\r\n\r\npu"
},
{
"path": "ccgbank/plugins/MyNgramGenSynProduct.java",
"chars": 304,
"preview": "package plugins;\r\n\r\nimport opennlp.ccg.ngrams.*;\r\nimport opennlp.ccg.synsem.*;\r\nimport java.io.*;\r\n\r\npublic class MyNgra"
},
{
"path": "ccgbank/plugins/MyNgramPrecisionBaselineGenInterp.java",
"chars": 649,
"preview": "\npackage plugins;\n\nimport opennlp.ccg.ngrams.*;\nimport opennlp.ccg.synsem.*;\n\nimport java.io.*;\n\npublic class MyNgramPre"
},
{
"path": "ccgbank/plugins/MyNgramPrecisionPerceptronInterp.java",
"chars": 1026,
"preview": "\npackage plugins;\n\nimport opennlp.ccg.ngrams.*;\nimport opennlp.ccg.synsem.*;\n\nimport java.io.*;\n\npublic class MyNgramPre"
},
{
"path": "ccgbank/plugins/MyParserPerceptronScorer.java",
"chars": 1047,
"preview": "\npackage plugins;\n\nimport opennlp.ccg.synsem.*;\nimport opennlp.ccg.perceptron.*;\nimport opennlp.ccg.hylo.*;\n\nimport java"
},
{
"path": "ccgbank/plugins/MyRealizerPerceptronScorer.java",
"chars": 2791,
"preview": "\npackage plugins;\n\nimport opennlp.ccg.synsem.*;\nimport opennlp.ccg.perceptron.*;\n\nimport java.io.*;\n\npublic class MyReal"
},
{
"path": "ccgbank/plugins/MySynAgrFeatureExtractor.java",
"chars": 310,
"preview": "\npackage plugins;\n\nimport opennlp.ccg.synsem.*;\nimport opennlp.ccg.perceptron.*;\nimport opennlp.ccg.hylo.*;\n\npublic clas"
},
{
"path": "ccgbank/plugins/MySynSemAgrFeatureExtractor.java",
"chars": 315,
"preview": "\npackage plugins;\n\nimport opennlp.ccg.synsem.*;\nimport opennlp.ccg.perceptron.*;\nimport opennlp.ccg.hylo.*;\n\npublic clas"
},
{
"path": "ccgbank/plugins/MySynSemFeatureExtractor.java",
"chars": 307,
"preview": "\npackage plugins;\n\nimport opennlp.ccg.synsem.*;\nimport opennlp.ccg.perceptron.*;\nimport opennlp.ccg.hylo.*;\n\npublic clas"
},
{
"path": "ccgbank/stanford-nlp/classifiers/stanfordner-README",
"chars": 43,
"preview": "Place your Stanford NE tagging models here."
},
{
"path": "ccgbank/stanford-nlp/stanfordnlp-README",
"chars": 162,
"preview": "Place your Stanford core NLP jar file here (preferably renaming it to 'stanford-core-nlp.jar') and\nthen also place, e.g."
},
{
"path": "ccgbank/templates/addFilterLexFeats.xsl",
"chars": 2935,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/addStems.xsl",
"chars": 3279,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/adjustAppos.xsl",
"chars": 2059,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/adjustCandCcats1.xsl",
"chars": 2956,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/adjustCats.xsl",
"chars": 6481,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/adjustParenthetical.xsl",
"chars": 2658,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/adjustReportedSpeech.xsl",
"chars": 1940,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/adjustRoles.xsl",
"chars": 2615,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/adv-placement.xsl",
"chars": 2373,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/agr-macroInsert.xsl",
"chars": 4408,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/allotIdLeaf.xsl",
"chars": 2383,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/allotIdTree.xsl",
"chars": 3936,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/allotIndexRel.xsl",
"chars": 6246,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/anim-macroInsert.xsl",
"chars": 3423,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateAppos-Dash.xsl",
"chars": 4023,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateAppos1.xsl",
"chars": 5359,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateAppos2.xsl",
"chars": 5620,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateAppos3.xsl",
"chars": 5802,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateBrackets.xsl",
"chars": 5689,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateColons.xsl",
"chars": 4344,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateDots.xsl",
"chars": 3756,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateExtraposedAppos.xsl",
"chars": 3213,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateNom-AdjConj.xsl",
"chars": 2745,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateParentheticals1.xsl",
"chars": 8185,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateParentheticals2.xsl",
"chars": 7784,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotatePlace.xsl",
"chars": 6973,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotatePrtConjs.xsl",
"chars": 6437,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateQuotes.xsl",
"chars": 4263,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateReportedSpeech.xsl",
"chars": 8010,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateStrayAppos.xsl",
"chars": 4369,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/annotateVPCommas.xsl",
"chars": 6410,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/balanceAppos.xsl",
"chars": 5178,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/balanceDash-Paren.xsl",
"chars": 4151,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/catCheck.xsl",
"chars": 2084,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/ccgRules.xsl",
"chars": 25475,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
},
{
"path": "ccgbank/templates/changePunct.xsl",
"chars": 2748,
"preview": "<!--Copyright (C) 2005-2009 Scott Martin, Rajakrishan Rajkumar and Michael White\n \n This library is free software; you c"
}
]
// ... and 781 more files (download for full content)
About this extraction
This page contains the full source code of the OpenCCG/openccg GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 981 files (8.0 MB), approximately 2.1M tokens, and a symbol index with 5554 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.