gitextract_z9hqe0ws/

├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   ├── feature_request.md
│   │   └── question.md
│   ├── pull_request_template.md
│   ├── stale.yml
│   └── workflows/
│       └── stanza-tests.yaml
├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── demo/
│   ├── CONLL_Dependency_Visualizer_Example.ipynb
│   ├── Dependency_Visualization_Testing.ipynb
│   ├── NER_Visualization.ipynb
│   ├── Stanza_Beginners_Guide.ipynb
│   ├── Stanza_CoreNLP_Interface.ipynb
│   ├── arabic_test.conllu.txt
│   ├── corenlp.py
│   ├── en_test.conllu.txt
│   ├── japanese_test.conllu.txt
│   ├── pipeline_demo.py
│   ├── scenegraph.py
│   ├── semgrex visualization.ipynb
│   ├── semgrex.py
│   └── ssurgeon_script.txt
├── doc/
│   └── CoreNLP.proto
├── scripts/
│   ├── config.sh
│   └── download_vectors.sh
├── setup.py
└── stanza/
    ├── __init__.py
    ├── _version.py
    ├── models/
    │   ├── __init__.py
    │   ├── _training_logging.py
    │   ├── charlm.py
    │   ├── classifier.py
    │   ├── classifiers/
    │   │   ├── __init__.py
    │   │   ├── base_classifier.py
    │   │   ├── cnn_classifier.py
    │   │   ├── config.py
    │   │   ├── constituency_classifier.py
    │   │   ├── data.py
    │   │   ├── iterate_test.py
    │   │   ├── trainer.py
    │   │   └── utils.py
    │   ├── common/
    │   │   ├── __init__.py
    │   │   ├── beam.py
    │   │   ├── bert_embedding.py
    │   │   ├── biaffine.py
    │   │   ├── build_short_name_to_treebank.py
    │   │   ├── char_model.py
    │   │   ├── chuliu_edmonds.py
    │   │   ├── constant.py
    │   │   ├── convert_pretrain.py
    │   │   ├── count_ner_coverage.py
    │   │   ├── count_pretrain_coverage.py
    │   │   ├── crf.py
    │   │   ├── data.py
    │   │   ├── doc.py
    │   │   ├── dropout.py
    │   │   ├── exceptions.py
    │   │   ├── foundation_cache.py
    │   │   ├── hlstm.py
    │   │   ├── large_margin_loss.py
    │   │   ├── loss.py
    │   │   ├── maxout_linear.py
    │   │   ├── packed_lstm.py
    │   │   ├── peft_config.py
    │   │   ├── pretrain.py
    │   │   ├── relative_attn.py
    │   │   ├── seq2seq_constant.py
    │   │   ├── seq2seq_model.py
    │   │   ├── seq2seq_modules.py
    │   │   ├── seq2seq_utils.py
    │   │   ├── short_name_to_treebank.py
    │   │   ├── stanza_object.py
    │   │   ├── trainer.py
    │   │   ├── utils.py
    │   │   └── vocab.py
    │   ├── constituency/
    │   │   ├── __init__.py
    │   │   ├── base_model.py
    │   │   ├── base_trainer.py
    │   │   ├── dynamic_oracle.py
    │   │   ├── ensemble.py
    │   │   ├── error_analysis_in_order.py
    │   │   ├── evaluate_treebanks.py
    │   │   ├── in_order_compound_oracle.py
    │   │   ├── in_order_oracle.py
    │   │   ├── label_attention.py
    │   │   ├── lstm_model.py
    │   │   ├── lstm_tree_stack.py
    │   │   ├── parse_transitions.py
    │   │   ├── parse_tree.py
    │   │   ├── parser_training.py
    │   │   ├── partitioned_transformer.py
    │   │   ├── positional_encoding.py
    │   │   ├── retagging.py
    │   │   ├── score_converted_dependencies.py
    │   │   ├── state.py
    │   │   ├── text_processing.py
    │   │   ├── top_down_oracle.py
    │   │   ├── trainer.py
    │   │   ├── transformer_tree_stack.py
    │   │   ├── transition_sequence.py
    │   │   ├── tree_embedding.py
    │   │   ├── tree_reader.py
    │   │   ├── tree_stack.py
    │   │   └── utils.py
    │   ├── constituency_parser.py
    │   ├── coref/
    │   │   ├── __init__.py
    │   │   ├── anaphoricity_scorer.py
    │   │   ├── bert.py
    │   │   ├── cluster_checker.py
    │   │   ├── config.py
    │   │   ├── conll.py
    │   │   ├── const.py
    │   │   ├── coref_chain.py
    │   │   ├── coref_config.toml
    │   │   ├── dataset.py
    │   │   ├── loss.py
    │   │   ├── model.py
    │   │   ├── pairwise_encoder.py
    │   │   ├── predict.py
    │   │   ├── rough_scorer.py
    │   │   ├── span_predictor.py
    │   │   ├── tokenizer_customization.py
    │   │   ├── utils.py
    │   │   └── word_encoder.py
    │   ├── depparse/
    │   │   ├── __init__.py
    │   │   ├── data.py
    │   │   ├── model.py
    │   │   ├── scorer.py
    │   │   └── trainer.py
    │   ├── identity_lemmatizer.py
    │   ├── lang_identifier.py
    │   ├── langid/
    │   │   ├── __init__.py
    │   │   ├── create_ud_data.py
    │   │   ├── data.py
    │   │   ├── model.py
    │   │   └── trainer.py
    │   ├── lemma/
    │   │   ├── __init__.py
    │   │   ├── attach_lemma_classifier.py
    │   │   ├── data.py
    │   │   ├── edit.py
    │   │   ├── scorer.py
    │   │   ├── trainer.py
    │   │   └── vocab.py
    │   ├── lemma_classifier/
    │   │   ├── __init__.py
    │   │   ├── base_model.py
    │   │   ├── base_trainer.py
    │   │   ├── baseline_model.py
    │   │   ├── constants.py
    │   │   ├── evaluate_many.py
    │   │   ├── evaluate_models.py
    │   │   ├── lstm_model.py
    │   │   ├── prepare_dataset.py
    │   │   ├── train_lstm_model.py
    │   │   ├── train_many.py
    │   │   ├── train_transformer_model.py
    │   │   ├── transformer_model.py
    │   │   └── utils.py
    │   ├── lemmatizer.py
    │   ├── mwt/
    │   │   ├── __init__.py
    │   │   ├── character_classifier.py
    │   │   ├── data.py
    │   │   ├── scorer.py
    │   │   ├── trainer.py
    │   │   ├── utils.py
    │   │   └── vocab.py
    │   ├── mwt_expander.py
    │   ├── ner/
    │   │   ├── __init__.py
    │   │   ├── data.py
    │   │   ├── model.py
    │   │   ├── scorer.py
    │   │   ├── trainer.py
    │   │   ├── utils.py
    │   │   └── vocab.py
    │   ├── ner_tagger.py
    │   ├── parser.py
    │   ├── pos/
    │   │   ├── __init__.py
    │   │   ├── build_xpos_vocab_factory.py
    │   │   ├── data.py
    │   │   ├── model.py
    │   │   ├── scorer.py
    │   │   ├── trainer.py
    │   │   ├── vocab.py
    │   │   ├── xpos_vocab_factory.py
    │   │   └── xpos_vocab_utils.py
    │   ├── tagger.py
    │   ├── tokenization/
    │   │   ├── __init__.py
    │   │   ├── data.py
    │   │   ├── model.py
    │   │   ├── tokenize_files.py
    │   │   ├── trainer.py
    │   │   ├── utils.py
    │   │   └── vocab.py
    │   ├── tokenizer.py
    │   └── wl_coref.py
    ├── pipeline/
    │   ├── __init__.py
    │   ├── _constants.py
    │   ├── constituency_processor.py
    │   ├── core.py
    │   ├── coref_processor.py
    │   ├── demo/
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── demo_server.py
    │   │   ├── stanza-brat.css
    │   │   ├── stanza-brat.html
    │   │   ├── stanza-brat.js
    │   │   └── stanza-parseviewer.js
    │   ├── depparse_processor.py
    │   ├── external/
    │   │   ├── __init__.py
    │   │   ├── corenlp_converter_depparse.py
    │   │   ├── jieba.py
    │   │   ├── pythainlp.py
    │   │   ├── spacy.py
    │   │   └── sudachipy.py
    │   ├── langid_processor.py
    │   ├── lemma_processor.py
    │   ├── morphseg_processor.py
    │   ├── multilingual.py
    │   ├── mwt_processor.py
    │   ├── ner_processor.py
    │   ├── pos_processor.py
    │   ├── processor.py
    │   ├── registry.py
    │   ├── sentiment_processor.py
    │   └── tokenize_processor.py
    ├── protobuf/
    │   ├── CoreNLP_pb2.py
    │   └── __init__.py
    ├── resources/
    │   ├── __init__.py
    │   ├── common.py
    │   ├── default_packages.py
    │   ├── installation.py
    │   ├── prepare_resources.py
    │   └── print_charlm_depparse.py
    ├── server/
    │   ├── __init__.py
    │   ├── annotator.py
    │   ├── client.py
    │   ├── dependency_converter.py
    │   ├── java_protobuf_requests.py
    │   ├── main.py
    │   ├── morphology.py
    │   ├── parser_eval.py
    │   ├── semgrex.py
    │   ├── ssurgeon.py
    │   ├── tokensregex.py
    │   ├── tsurgeon.py
    │   └── ud_enhancer.py
    ├── tests/
    │   ├── __init__.py
    │   ├── classifiers/
    │   │   ├── __init__.py
    │   │   ├── test_classifier.py
    │   │   ├── test_constituency_classifier.py
    │   │   ├── test_data.py
    │   │   └── test_process_utils.py
    │   ├── common/
    │   │   ├── __init__.py
    │   │   ├── test_bert_embedding.py
    │   │   ├── test_char_model.py
    │   │   ├── test_chuliu_edmonds.py
    │   │   ├── test_common_data.py
    │   │   ├── test_confusion.py
    │   │   ├── test_constant.py
    │   │   ├── test_data_conversion.py
    │   │   ├── test_data_objects.py
    │   │   ├── test_doc.py
    │   │   ├── test_dropout.py
    │   │   ├── test_foundation_cache.py
    │   │   ├── test_pretrain.py
    │   │   ├── test_relative_attn.py
    │   │   ├── test_short_name_to_treebank.py
    │   │   └── test_utils.py
    │   ├── constituency/
    │   │   ├── __init__.py
    │   │   ├── test_convert_arboretum.py
    │   │   ├── test_convert_it_vit.py
    │   │   ├── test_convert_starlang.py
    │   │   ├── test_ensemble.py
    │   │   ├── test_in_order_compound_oracle.py
    │   │   ├── test_in_order_oracle.py
    │   │   ├── test_lstm_model.py
    │   │   ├── test_parse_transitions.py
    │   │   ├── test_parse_tree.py
    │   │   ├── test_positional_encoding.py
    │   │   ├── test_selftrain_vi_quad.py
    │   │   ├── test_text_processing.py
    │   │   ├── test_top_down_oracle.py
    │   │   ├── test_trainer.py
    │   │   ├── test_transformer_tree_stack.py
    │   │   ├── test_transition_sequence.py
    │   │   ├── test_tree_reader.py
    │   │   ├── test_tree_stack.py
    │   │   ├── test_utils.py
    │   │   └── test_vietnamese.py
    │   ├── datasets/
    │   │   ├── __init__.py
    │   │   ├── coref/
    │   │   │   ├── __init__.py
    │   │   │   └── test_hebrew_iahlt.py
    │   │   ├── ner/
    │   │   │   ├── __init__.py
    │   │   │   ├── test_prepare_ner_file.py
    │   │   │   └── test_utils.py
    │   │   ├── test_common.py
    │   │   └── test_vietnamese_renormalization.py
    │   ├── depparse/
    │   │   ├── __init__.py
    │   │   ├── test_depparse_data.py
    │   │   └── test_parser.py
    │   ├── langid/
    │   │   ├── __init__.py
    │   │   ├── test_langid.py
    │   │   └── test_multilingual.py
    │   ├── lemma/
    │   │   ├── __init__.py
    │   │   ├── test_data.py
    │   │   ├── test_lemma_trainer.py
    │   │   └── test_lowercase.py
    │   ├── lemma_classifier/
    │   │   ├── __init__.py
    │   │   ├── test_data_preparation.py
    │   │   └── test_training.py
    │   ├── morphseg/
    │   │   ├── __init__.py
    │   │   ├── conftest.py
    │   │   ├── test_integration.py
    │   │   ├── test_morpheme_segmenter.py
    │   │   └── test_stanza_integration.py
    │   ├── mwt/
    │   │   ├── __init__.py
    │   │   ├── test_character_classifier.py
    │   │   ├── test_english_corner_cases.py
    │   │   ├── test_prepare_mwt.py
    │   │   └── test_utils.py
    │   ├── ner/
    │   │   ├── __init__.py
    │   │   ├── test_bsf_2_beios.py
    │   │   ├── test_bsf_2_iob.py
    │   │   ├── test_combine_ner_datasets.py
    │   │   ├── test_convert_amt.py
    │   │   ├── test_convert_nkjp.py
    │   │   ├── test_convert_starlang_ner.py
    │   │   ├── test_data.py
    │   │   ├── test_from_conllu.py
    │   │   ├── test_models_ner_scorer.py
    │   │   ├── test_ner_tagger.py
    │   │   ├── test_ner_trainer.py
    │   │   ├── test_ner_training.py
    │   │   ├── test_ner_utils.py
    │   │   ├── test_pay_amt_annotators.py
    │   │   ├── test_split_wikiner.py
    │   │   └── test_suc3.py
    │   ├── pipeline/
    │   │   ├── __init__.py
    │   │   ├── pipeline_device_tests.py
    │   │   ├── test_arabic_pipeline.py
    │   │   ├── test_core.py
    │   │   ├── test_decorators.py
    │   │   ├── test_depparse.py
    │   │   ├── test_english_pipeline.py
    │   │   ├── test_french_pipeline.py
    │   │   ├── test_lemmatizer.py
    │   │   ├── test_pipeline_constituency_processor.py
    │   │   ├── test_pipeline_depparse_processor.py
    │   │   ├── test_pipeline_mwt_expander.py
    │   │   ├── test_pipeline_ner_processor.py
    │   │   ├── test_pipeline_pos_processor.py
    │   │   ├── test_pipeline_sentiment_processor.py
    │   │   ├── test_requirements.py
    │   │   └── test_tokenizer.py
    │   ├── pos/
    │   │   ├── __init__.py
    │   │   ├── test_data.py
    │   │   ├── test_tagger.py
    │   │   └── test_xpos_vocab_factory.py
    │   ├── pytest.ini
    │   ├── resources/
    │   │   ├── __init__.py
    │   │   ├── test_charlm_depparse.py
    │   │   ├── test_common.py
    │   │   ├── test_default_packages.py
    │   │   ├── test_installation.py
    │   │   └── test_prepare_resources.py
    │   ├── server/
    │   │   ├── __init__.py
    │   │   ├── test_client.py
    │   │   ├── test_java_protobuf_requests.py
    │   │   ├── test_morphology.py
    │   │   ├── test_parser_eval.py
    │   │   ├── test_protobuf.py
    │   │   ├── test_semgrex.py
    │   │   ├── test_server_misc.py
    │   │   ├── test_server_pretokenized.py
    │   │   ├── test_server_request.py
    │   │   ├── test_server_start.py
    │   │   ├── test_ssurgeon.py
    │   │   ├── test_tokensregex.py
    │   │   ├── test_tsurgeon.py
    │   │   └── test_ud_enhancer.py
    │   ├── setup.py
    │   └── tokenization/
    │       ├── __init__.py
    │       ├── test_prepare_tokenizer_treebank.py
    │       ├── test_replace_long_tokens.py
    │       ├── test_spaces.py
    │       ├── test_tokenization_lst20.py
    │       ├── test_tokenization_orchid.py
    │       ├── test_tokenize_data.py
    │       ├── test_tokenize_files.py
    │       ├── test_tokenize_utils.py
    │       └── test_vocab.py
    └── utils/
        ├── __init__.py
        ├── avg_sent_len.py
        ├── charlm/
        │   ├── __init__.py
        │   ├── conll17_to_text.py
        │   ├── dump_oscar.py
        │   ├── make_lm_data.py
        │   └── oscar_to_text.py
        ├── confusion.py
        ├── conll.py
        ├── constituency/
        │   ├── __init__.py
        │   ├── check_transitions.py
        │   ├── grep_dev_logs.py
        │   ├── grep_test_logs.py
        │   └── list_tensors.py
        ├── datasets/
        │   ├── __init__.py
        │   ├── common.py
        │   ├── conllu_to_text.py
        │   ├── constituency/
        │   │   ├── __init__.py
        │   │   ├── build_silver_dataset.py
        │   │   ├── common_trees.py
        │   │   ├── convert_alt.py
        │   │   ├── convert_arboretum.py
        │   │   ├── convert_cintil.py
        │   │   ├── convert_ctb.py
        │   │   ├── convert_icepahc.py
        │   │   ├── convert_it_turin.py
        │   │   ├── convert_it_vit.py
        │   │   ├── convert_spmrl.py
        │   │   ├── convert_starlang.py
        │   │   ├── count_common_words.py
        │   │   ├── extract_all_silver_dataset.py
        │   │   ├── extract_silver_dataset.py
        │   │   ├── prepare_con_dataset.py
        │   │   ├── reduce_dataset.py
        │   │   ├── relabel_tags.py
        │   │   ├── selftrain.py
        │   │   ├── selftrain_it.py
        │   │   ├── selftrain_single_file.py
        │   │   ├── selftrain_vi_quad.py
        │   │   ├── selftrain_wiki.py
        │   │   ├── silver_variance.py
        │   │   ├── split_holdout.py
        │   │   ├── split_weighted_ensemble.py
        │   │   ├── tokenize_wiki.py
        │   │   ├── treebank_to_labeled_brackets.py
        │   │   ├── utils.py
        │   │   ├── vtb_convert.py
        │   │   └── vtb_split.py
        │   ├── contract_mwt.py
        │   ├── coref/
        │   │   ├── __init__.py
        │   │   ├── balance_languages.py
        │   │   ├── convert_hebrew_iahlt.py
        │   │   ├── convert_hebrew_mixed.py
        │   │   ├── convert_hindi.py
        │   │   ├── convert_ontonotes.py
        │   │   ├── convert_tamil.py
        │   │   ├── convert_udcoref.py
        │   │   ├── convert_udcoref_1.2.py
        │   │   └── utils.py
        │   ├── corenlp_segmenter_dataset.py
        │   ├── depparse/
        │   │   └── check_results.py
        │   ├── ner/
        │   │   ├── __init__.py
        │   │   ├── build_en_combined.py
        │   │   ├── check_for_duplicates.py
        │   │   ├── combine_ner_datasets.py
        │   │   ├── compare_entities.py
        │   │   ├── conll_to_iob.py
        │   │   ├── convert_amt.py
        │   │   ├── convert_ar_aqmar.py
        │   │   ├── convert_bn_daffodil.py
        │   │   ├── convert_bsf_to_beios.py
        │   │   ├── convert_bsnlp.py
        │   │   ├── convert_en_conll03.py
        │   │   ├── convert_fire_2013.py
        │   │   ├── convert_he_iahlt.py
        │   │   ├── convert_hy_armtdp.py
        │   │   ├── convert_ijc.py
        │   │   ├── convert_kk_kazNERD.py
        │   │   ├── convert_lst20.py
        │   │   ├── convert_mr_l3cube.py
        │   │   ├── convert_my_ucsy.py
        │   │   ├── convert_nkjp.py
        │   │   ├── convert_nner22.py
        │   │   ├── convert_nytk.py
        │   │   ├── convert_ontonotes.py
        │   │   ├── convert_rgai.py
        │   │   ├── convert_sindhi_siner.py
        │   │   ├── convert_starlang_ner.py
        │   │   ├── count_entities.py
        │   │   ├── json_to_bio.py
        │   │   ├── misc_to_date.py
        │   │   ├── ontonotes_multitag.py
        │   │   ├── prepare_ner_dataset.py
        │   │   ├── prepare_ner_file.py
        │   │   ├── preprocess_wikiner.py
        │   │   ├── simplify_en_worldwide.py
        │   │   ├── simplify_ontonotes_to_worldwide.py
        │   │   ├── split_wikiner.py
        │   │   ├── suc_conll_to_iob.py
        │   │   ├── suc_to_iob.py
        │   │   └── utils.py
        │   ├── pos/
        │   │   ├── __init__.py
        │   │   ├── convert_trees_to_pos.py
        │   │   └── remove_columns.py
        │   ├── prepare_depparse_treebank.py
        │   ├── prepare_lemma_classifier.py
        │   ├── prepare_lemma_treebank.py
        │   ├── prepare_mwt_treebank.py
        │   ├── prepare_pos_treebank.py
        │   ├── prepare_tokenizer_data.py
        │   ├── prepare_tokenizer_treebank.py
        │   ├── pretrain/
        │   │   ├── __init__.py
        │   │   └── word_in_pretrain.py
        │   ├── random_split_conllu.py
        │   ├── sentiment/
        │   │   ├── __init__.py
        │   │   ├── add_constituency.py
        │   │   ├── convert_italian_poetry_classification.py
        │   │   ├── convert_italian_sentence_classification.py
        │   │   ├── prepare_sentiment_dataset.py
        │   │   ├── process_MELD.py
        │   │   ├── process_airline.py
        │   │   ├── process_arguana_xml.py
        │   │   ├── process_corona.py
        │   │   ├── process_es_tass2020.py
        │   │   ├── process_it_sentipolc16.py
        │   │   ├── process_ren_chinese.py
        │   │   ├── process_sb10k.py
        │   │   ├── process_scare.py
        │   │   ├── process_slsd.py
        │   │   ├── process_sst.py
        │   │   ├── process_usage_german.py
        │   │   ├── process_utils.py
        │   │   └── process_vsfc_vietnamese.py
        │   ├── thai_syllable_dict_generator.py
        │   ├── tokenization/
        │   │   ├── __init__.py
        │   │   ├── convert_ml_cochin.py
        │   │   ├── convert_my_alt.py
        │   │   ├── convert_text_files.py
        │   │   ├── convert_th_best.py
        │   │   ├── convert_th_lst20.py
        │   │   ├── convert_th_orchid.py
        │   │   ├── convert_vi_vlsp.py
        │   │   └── process_thai_tokenization.py
        │   └── vietnamese/
        │       ├── __init__.py
        │       └── renormalize.py
        ├── default_paths.py
        ├── get_tqdm.py
        ├── helper_func.py
        ├── languages/
        │   ├── __init__.py
        │   └── kazakh_transliteration.py
        ├── lemma/
        │   ├── __init__.py
        │   └── count_ambiguous_lemmas.py
        ├── max_mwt_length.py
        ├── ner/
        │   ├── __init__.py
        │   ├── flair_ner_tag_dataset.py
        │   ├── paying_annotators.py
        │   └── spacy_ner_tag_dataset.py
        ├── pretrain/
        │   ├── __init__.py
        │   └── compare_pretrains.py
        ├── select_backoff.py
        ├── training/
        │   ├── __init__.py
        │   ├── common.py
        │   ├── compose_ete_results.py
        │   ├── remove_constituency_optimizer.py
        │   ├── run_charlm.py
        │   ├── run_constituency.py
        │   ├── run_depparse.py
        │   ├── run_ete.py
        │   ├── run_lemma.py
        │   ├── run_lemma_classifier.py
        │   ├── run_mwt.py
        │   ├── run_ner.py
        │   ├── run_pos.py
        │   ├── run_sentiment.py
        │   ├── run_tokenizer.py
        │   └── separate_ner_pretrain.py
        └── visualization/
            ├── README
            ├── __init__.py
            ├── conll_deprel_visualization.py
            ├── constants.py
            ├── dependency_visualization.py
            ├── ner_visualization.py
            ├── semgrex_app.py
            ├── semgrex_visualizer.py
            ├── ssurgeon_visualizer.py
            └── utils.py