Showing preview only (1,916K chars total). Download the full file or copy to clipboard to get everything.
Repository: deeppavlov/DeepPavlov
Branch: master
Commit: 5f9fbed0c719
Files: 411
Total size: 1.8 MB
Directory structure:
gitextract__x5jpadh/
├── .github/
│ └── ISSUE_TEMPLATE/
│ ├── bug_report.md
│ ├── config.yml
│ └── feature-request.md
├── .gitignore
├── .readthedocs.yml
├── CNAME
├── Jenkinsfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── _config.yml
├── _layouts/
│ └── default.html
├── deeppavlov/
│ ├── __init__.py
│ ├── __main__.py
│ ├── _meta.py
│ ├── configs/
│ │ ├── __init__.py
│ │ ├── classifiers/
│ │ │ ├── boolqa_rubert.json
│ │ │ ├── few_shot_roberta.json
│ │ │ ├── glue/
│ │ │ │ ├── glue_cola_roberta.json
│ │ │ │ ├── glue_mnli_cased_bert_torch.json
│ │ │ │ ├── glue_mnli_mm_cased_bert_torch.json
│ │ │ │ ├── glue_mnli_roberta.json
│ │ │ │ ├── glue_mrpc_roberta.json
│ │ │ │ ├── glue_qnli_roberta.json
│ │ │ │ ├── glue_qqp_roberta.json
│ │ │ │ ├── glue_rte_cased_bert_torch.json
│ │ │ │ ├── glue_rte_roberta_mnli.json
│ │ │ │ ├── glue_sst2_roberta.json
│ │ │ │ ├── glue_stsb_roberta.json
│ │ │ │ └── glue_wnli_roberta.json
│ │ │ ├── insults_kaggle_bert.json
│ │ │ ├── paraphraser_convers_distilrubert_2L.json
│ │ │ ├── paraphraser_convers_distilrubert_6L.json
│ │ │ ├── paraphraser_rubert.json
│ │ │ ├── query_pr.json
│ │ │ ├── rusentiment_bert.json
│ │ │ ├── rusentiment_convers_bert.json
│ │ │ ├── rusentiment_convers_distilrubert_2L.json
│ │ │ ├── rusentiment_convers_distilrubert_6L.json
│ │ │ ├── sentiment_sst_conv_bert.json
│ │ │ ├── sentiment_twitter.json
│ │ │ ├── superglue/
│ │ │ │ ├── superglue_boolq_roberta_mnli.json
│ │ │ │ ├── superglue_copa_roberta.json
│ │ │ │ ├── superglue_record_roberta.json
│ │ │ │ └── superglue_wic_bert.json
│ │ │ └── topics_distilbert_base_uncased.json
│ │ ├── doc_retrieval/
│ │ │ ├── en_ranker_pop_wiki.json
│ │ │ ├── en_ranker_tfidf_wiki.json
│ │ │ └── ru_ranker_tfidf_wiki.json
│ │ ├── embedder/
│ │ │ ├── bert_embedder.json
│ │ │ └── bert_sentence_embedder.json
│ │ ├── entity_extraction/
│ │ │ ├── entity_detection_en.json
│ │ │ ├── entity_detection_ru.json
│ │ │ ├── entity_extraction_en.json
│ │ │ ├── entity_extraction_ru.json
│ │ │ ├── entity_linking_en.json
│ │ │ └── entity_linking_ru.json
│ │ ├── faq/
│ │ │ └── fasttext_logreg.json
│ │ ├── kbqa/
│ │ │ ├── kbqa_cq_en.json
│ │ │ ├── kbqa_cq_ru.json
│ │ │ └── wiki_parser.json
│ │ ├── morpho_syntax_parser/
│ │ │ ├── morpho_ru_syntagrus_bert.json
│ │ │ ├── ru_syntagrus_joint_parsing.json
│ │ │ └── syntax_ru_syntagrus_bert.json
│ │ ├── multitask/
│ │ │ ├── mt_glue.json
│ │ │ └── multitask_example.json
│ │ ├── ner/
│ │ │ ├── ner_bert_base.json
│ │ │ ├── ner_case_agnostic_mdistilbert.json
│ │ │ ├── ner_collection3_bert.json
│ │ │ ├── ner_conll2003_bert.json
│ │ │ ├── ner_conll2003_deberta_crf.json
│ │ │ ├── ner_ontonotes_bert.json
│ │ │ ├── ner_ontonotes_bert_mult.json
│ │ │ ├── ner_ontonotes_deberta_crf.json
│ │ │ ├── ner_rus_bert.json
│ │ │ ├── ner_rus_bert_probas.json
│ │ │ ├── ner_rus_convers_distilrubert_2L.json
│ │ │ └── ner_rus_convers_distilrubert_6L.json
│ │ ├── odqa/
│ │ │ ├── en_odqa_infer_wiki.json
│ │ │ ├── en_odqa_pop_infer_wiki.json
│ │ │ └── ru_odqa_infer_wiki.json
│ │ ├── ranking/
│ │ │ ├── path_ranking_nll_roberta_en.json
│ │ │ ├── ranking_ubuntu_v2_torch_bert_uncased.json
│ │ │ ├── rel_ranking_nll_bert_ru.json
│ │ │ └── rel_ranking_roberta_en.json
│ │ ├── regressors/
│ │ │ └── translation_ranker.json
│ │ ├── relation_extraction/
│ │ │ ├── re_docred.json
│ │ │ └── re_rured.json
│ │ ├── russian_super_glue/
│ │ │ ├── russian_superglue_danetqa_rubert.json
│ │ │ ├── russian_superglue_lidirus_rubert.json
│ │ │ ├── russian_superglue_muserc_rubert.json
│ │ │ ├── russian_superglue_parus_rubert.json
│ │ │ ├── russian_superglue_rcb_rubert.json
│ │ │ ├── russian_superglue_rucos_rubert.json
│ │ │ ├── russian_superglue_russe_rubert.json
│ │ │ ├── russian_superglue_rwsd_rubert.json
│ │ │ └── russian_superglue_terra_rubert.json
│ │ ├── sentence_segmentation/
│ │ │ └── sentseg_dailydialog_bert.json
│ │ ├── spelling_correction/
│ │ │ ├── brillmoore_wikitypos_en.json
│ │ │ └── levenshtein_corrector_ru.json
│ │ └── squad/
│ │ ├── qa_multisberquad_bert.json
│ │ ├── qa_nq_psgcls_bert.json
│ │ ├── qa_squad2_bert.json
│ │ ├── squad_bert.json
│ │ ├── squad_ru_bert.json
│ │ ├── squad_ru_convers_distilrubert_2L.json
│ │ └── squad_ru_convers_distilrubert_6L.json
│ ├── core/
│ │ ├── __init__.py
│ │ ├── commands/
│ │ │ ├── __init__.py
│ │ │ ├── infer.py
│ │ │ ├── train.py
│ │ │ └── utils.py
│ │ ├── common/
│ │ │ ├── __init__.py
│ │ │ ├── aliases.py
│ │ │ ├── base.py
│ │ │ ├── chainer.py
│ │ │ ├── cross_validation.py
│ │ │ ├── errors.py
│ │ │ ├── file.py
│ │ │ ├── log.py
│ │ │ ├── log_events.py
│ │ │ ├── metrics_registry.json
│ │ │ ├── metrics_registry.py
│ │ │ ├── params.py
│ │ │ ├── params_search.py
│ │ │ ├── paths.py
│ │ │ ├── prints.py
│ │ │ ├── registry.json
│ │ │ ├── registry.py
│ │ │ └── requirements_registry.json
│ │ ├── data/
│ │ │ ├── __init__.py
│ │ │ ├── data_fitting_iterator.py
│ │ │ ├── data_learning_iterator.py
│ │ │ ├── dataset_reader.py
│ │ │ ├── simple_vocab.py
│ │ │ └── utils.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── component.py
│ │ │ ├── estimator.py
│ │ │ ├── nn_model.py
│ │ │ ├── serializable.py
│ │ │ └── torch_model.py
│ │ └── trainers/
│ │ ├── __init__.py
│ │ ├── fit_trainer.py
│ │ ├── nn_trainer.py
│ │ ├── torch_trainer.py
│ │ └── utils.py
│ ├── dataset_iterators/
│ │ ├── __init__.py
│ │ ├── basic_classification_iterator.py
│ │ ├── huggingface_dataset_iterator.py
│ │ ├── morphotagger_iterator.py
│ │ ├── multitask_iterator.py
│ │ ├── siamese_iterator.py
│ │ ├── sqlite_iterator.py
│ │ ├── squad_iterator.py
│ │ └── typos_iterator.py
│ ├── dataset_readers/
│ │ ├── __init__.py
│ │ ├── basic_classification_reader.py
│ │ ├── boolqa_reader.py
│ │ ├── conll2003_reader.py
│ │ ├── docred_reader.py
│ │ ├── faq_reader.py
│ │ ├── huggingface_dataset_reader.py
│ │ ├── imdb_reader.py
│ │ ├── line_reader.py
│ │ ├── morphotagging_dataset_reader.py
│ │ ├── multitask_reader.py
│ │ ├── odqa_reader.py
│ │ ├── paraphraser_reader.py
│ │ ├── rel_ranking_reader.py
│ │ ├── rured_reader.py
│ │ ├── sq_reader.py
│ │ ├── squad_dataset_reader.py
│ │ ├── typos_reader.py
│ │ └── ubuntu_v2_reader.py
│ ├── deep.py
│ ├── download.py
│ ├── metrics/
│ │ ├── __init__.py
│ │ ├── accuracy.py
│ │ ├── bleu.py
│ │ ├── correlation.py
│ │ ├── elmo_metrics.py
│ │ ├── fmeasure.py
│ │ ├── google_bleu.py
│ │ ├── log_loss.py
│ │ ├── mse.py
│ │ ├── recall_at_k.py
│ │ ├── record_metrics.py
│ │ ├── roc_auc_score.py
│ │ └── squad_metrics.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── api_requester/
│ │ │ ├── __init__.py
│ │ │ ├── api_requester.py
│ │ │ └── api_router.py
│ │ ├── classifiers/
│ │ │ ├── __init__.py
│ │ │ ├── cos_sim_classifier.py
│ │ │ ├── dnnc_proba2labels.py
│ │ │ ├── proba2labels.py
│ │ │ ├── re_bert.py
│ │ │ ├── torch_classification_model.py
│ │ │ ├── torch_nets.py
│ │ │ └── utils.py
│ │ ├── doc_retrieval/
│ │ │ ├── __init__.py
│ │ │ ├── bpr.py
│ │ │ ├── logit_ranker.py
│ │ │ ├── pop_ranker.py
│ │ │ ├── tfidf_ranker.py
│ │ │ └── utils.py
│ │ ├── embedders/
│ │ │ ├── __init__.py
│ │ │ ├── abstract_embedder.py
│ │ │ ├── fasttext_embedder.py
│ │ │ ├── tfidf_weighted_embedder.py
│ │ │ └── transformers_embedder.py
│ │ ├── entity_extraction/
│ │ │ ├── __init__.py
│ │ │ ├── entity_detection_parser.py
│ │ │ ├── entity_linking.py
│ │ │ ├── find_word.py
│ │ │ └── ner_chunker.py
│ │ ├── kbqa/
│ │ │ ├── __init__.py
│ │ │ ├── query_generator.py
│ │ │ ├── query_generator_base.py
│ │ │ ├── rel_ranking_infer.py
│ │ │ ├── ru_adj_to_noun.py
│ │ │ ├── sentence_answer.py
│ │ │ ├── template_matcher.py
│ │ │ ├── tree_to_sparql.py
│ │ │ ├── type_define.py
│ │ │ ├── utils.py
│ │ │ └── wiki_parser.py
│ │ ├── morpho_syntax_parser/
│ │ │ ├── __init__.py
│ │ │ ├── dependency_decoding.py
│ │ │ ├── joint.py
│ │ │ ├── spacy_lemmatizer.py
│ │ │ └── syntax_parsing.py
│ │ ├── preprocessors/
│ │ │ ├── __init__.py
│ │ │ ├── dirty_comments_preprocessor.py
│ │ │ ├── dnnc_preprocessor.py
│ │ │ ├── mask.py
│ │ │ ├── multitask_preprocessor.py
│ │ │ ├── ner_preprocessor.py
│ │ │ ├── odqa_preprocessors.py
│ │ │ ├── one_hotter.py
│ │ │ ├── re_preprocessor.py
│ │ │ ├── response_base_loader.py
│ │ │ ├── sanitizer.py
│ │ │ ├── sentseg_preprocessor.py
│ │ │ ├── squad_preprocessor.py
│ │ │ ├── str_lower.py
│ │ │ ├── str_token_reverser.py
│ │ │ ├── str_utf8_encoder.py
│ │ │ ├── torch_transformers_preprocessor.py
│ │ │ └── transformers_preprocessor.py
│ │ ├── ranking/
│ │ │ ├── __init__.py
│ │ │ └── metrics.py
│ │ ├── relation_extraction/
│ │ │ ├── __init__.py
│ │ │ ├── losses.py
│ │ │ └── relation_extraction_bert.py
│ │ ├── sklearn/
│ │ │ ├── __init__.py
│ │ │ └── sklearn_component.py
│ │ ├── spelling_correction/
│ │ │ ├── __init__.py
│ │ │ ├── brillmoore/
│ │ │ │ ├── __init__.py
│ │ │ │ └── error_model.py
│ │ │ ├── electors/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── kenlm_elector.py
│ │ │ │ └── top1_elector.py
│ │ │ └── levenshtein/
│ │ │ ├── __init__.py
│ │ │ ├── levenshtein_searcher.py
│ │ │ ├── searcher_component.py
│ │ │ └── tabled_trie.py
│ │ ├── tokenizers/
│ │ │ ├── __init__.py
│ │ │ ├── lazy_tokenizer.py
│ │ │ ├── nltk_moses_tokenizer.py
│ │ │ ├── nltk_tokenizer.py
│ │ │ ├── spacy_tokenizer.py
│ │ │ ├── split_tokenizer.py
│ │ │ └── utils.py
│ │ ├── torch_bert/
│ │ │ ├── __init__.py
│ │ │ ├── crf.py
│ │ │ ├── multitask_transformer.py
│ │ │ ├── torch_bert_ranker.py
│ │ │ ├── torch_transformers_classifier.py
│ │ │ ├── torch_transformers_el_ranker.py
│ │ │ ├── torch_transformers_multiplechoice.py
│ │ │ ├── torch_transformers_nll_ranking.py
│ │ │ ├── torch_transformers_sequence_tagger.py
│ │ │ ├── torch_transformers_squad.py
│ │ │ └── torch_transformers_syntax_parser.py
│ │ └── vectorizers/
│ │ ├── __init__.py
│ │ └── hashing_tfidf_vectorizer.py
│ ├── paramsearch.py
│ ├── requirements/
│ │ ├── datasets.txt
│ │ ├── dependency_decoding.txt
│ │ ├── en_core_web_sm.txt
│ │ ├── faiss.txt
│ │ ├── fasttext.txt
│ │ ├── hdt.txt
│ │ ├── kenlm.txt
│ │ ├── lxml.txt
│ │ ├── opt_einsum.txt
│ │ ├── protobuf.txt
│ │ ├── pytorch.txt
│ │ ├── rapidfuzz.txt
│ │ ├── razdel.txt
│ │ ├── ru_core_news_sm.txt
│ │ ├── sacremoses.txt
│ │ ├── sentencepiece.txt
│ │ ├── slovnet.txt
│ │ ├── sortedcontainers.txt
│ │ ├── torchcrf.txt
│ │ ├── transformers.txt
│ │ ├── udapi.txt
│ │ └── whapi.txt
│ ├── settings.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── benchmarks/
│ │ │ ├── __init__.py
│ │ │ └── benchmarks.py
│ │ ├── connector/
│ │ │ ├── __init__.py
│ │ │ └── dialog_logger.py
│ │ ├── pip_wrapper/
│ │ │ ├── __init__.py
│ │ │ └── pip_wrapper.py
│ │ ├── server/
│ │ │ ├── __init__.py
│ │ │ ├── metrics.py
│ │ │ └── server.py
│ │ ├── settings/
│ │ │ ├── __init__.py
│ │ │ ├── dialog_logger_config.json
│ │ │ ├── log_config.json
│ │ │ └── server_config.json
│ │ └── socket/
│ │ ├── __init__.py
│ │ └── socket.py
│ └── vocabs/
│ ├── __init__.py
│ ├── typos.py
│ └── wiki_sqlite.py
├── docs/
│ ├── Makefile
│ ├── _static/
│ │ ├── deeppavlov.css
│ │ └── my_blocks.css
│ ├── _templates/
│ │ └── footer.html
│ ├── apiref/
│ │ ├── core/
│ │ │ ├── commands.rst
│ │ │ ├── common.rst
│ │ │ ├── data.rst
│ │ │ ├── models.rst
│ │ │ └── trainers.rst
│ │ ├── core.rst
│ │ ├── dataset_iterators.rst
│ │ ├── dataset_readers.rst
│ │ ├── metrics.rst
│ │ ├── models/
│ │ │ ├── api_requester.rst
│ │ │ ├── classifiers.rst
│ │ │ ├── doc_retrieval.rst
│ │ │ ├── embedders.rst
│ │ │ ├── entity_extraction.rst
│ │ │ ├── kbqa.rst
│ │ │ ├── preprocessors.rst
│ │ │ ├── relation_extraction.rst
│ │ │ ├── sklearn.rst
│ │ │ ├── spelling_correction.rst
│ │ │ ├── tokenizers.rst
│ │ │ ├── torch_bert.rst
│ │ │ └── vectorizers.rst
│ │ ├── models.rst
│ │ └── vocabs.rst
│ ├── conf.py
│ ├── devguides/
│ │ ├── contribution_guide.rst
│ │ └── registry.rst
│ ├── features/
│ │ ├── hypersearch.rst
│ │ ├── models/
│ │ │ ├── KBQA.ipynb
│ │ │ ├── NER.ipynb
│ │ │ ├── ODQA.ipynb
│ │ │ ├── SQuAD.ipynb
│ │ │ ├── bert.rst
│ │ │ ├── classification.ipynb
│ │ │ ├── entity_extraction.ipynb
│ │ │ ├── few_shot_classification.ipynb
│ │ │ ├── morpho_tagger.ipynb
│ │ │ ├── multitask_bert.rst
│ │ │ ├── neural_ranking.ipynb
│ │ │ ├── popularity_ranking.rst
│ │ │ ├── relation_extraction.ipynb
│ │ │ ├── spelling_correction.ipynb
│ │ │ ├── superglue.rst
│ │ │ ├── syntax_parser.ipynb
│ │ │ └── tfidf_ranking.ipynb
│ │ ├── overview.rst
│ │ └── pretrained_vectors.rst
│ ├── index.rst
│ ├── integrations/
│ │ ├── aws_ec2.rst
│ │ ├── rest_api.rst
│ │ ├── settings.rst
│ │ └── socket_api.rst
│ ├── internships/
│ │ └── internships.rst
│ └── intro/
│ ├── configuration.rst
│ ├── installation.rst
│ ├── overview.rst
│ ├── python.ipynb
│ └── quick_start.rst
├── requirements.txt
├── setup.py
├── tests/
│ ├── __init__.py
│ ├── test_configs/
│ │ └── doc_retrieval/
│ │ ├── en_ranker_pop_wiki_test.json
│ │ ├── en_ranker_tfidf_wiki_test.json
│ │ └── ru_ranker_tfidf_wiki_test.json
│ └── test_quick_start.py
└── utils/
├── Docker/
│ ├── Dockerfile
│ ├── README.md
│ ├── cmd.sh
│ └── docker-compose.yml
├── __init__.py
└── prepare/
├── __init__.py
├── hashes.py
├── optimize_ipynb.py
├── registry.py
└── upload.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Report on a bug you encountered
title: ''
labels: bug
assignees: ''
---
Want to contribute to DeepPavlov? Please read the [contributing guideline](http://docs.deeppavlov.ai/en/master/devguides/contribution_guide.html) first.
Please enter all the information below, otherwise your issue may be closed without a warning.
**DeepPavlov version** (you can look it up by running `pip show deeppavlov`):
**Python version**:
**Operating system** (ubuntu linux, windows, ...):
**Issue**:
**Content or a name of a configuration file**:
```
```
**Command that led to error**:
```
```
**Error (including full traceback)**:
```
```
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links:
- name: Ask a question
url: https://forum.deeppavlov.ai/
about: If you have a different question, please ask it in the forum https://forum.deeppavlov.ai
================================================
FILE: .github/ISSUE_TEMPLATE/feature-request.md
================================================
---
name: Feature request
about: Suggest a feature to improve the DeepPavlov library
title: ''
labels: enhancement
assignees: ''
---
Want to contribute to DeepPavlov? Please read the [contributing guideline](http://docs.deeppavlov.ai/en/master/devguides/contribution_guide.html) first.
**What problem are we trying to solve?**:
```
```
**How can we solve it?**:
```
```
**Are there other issues that block this solution?**:
```
```
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
#IDEA
.idea/
#Atom IDE
.ftpconfig
#vscode IDE
.vscode
# Vim
*.vim
*.vimrc
#GIT
.git/
#Default usr dir
download/
#project test
/test/
.pytest_cache
# project data
/data/
# local dockerfiles
/Dockerfile
/entrypoint.sh
/.dockerignore
================================================
FILE: .readthedocs.yml
================================================
# .readthedocs.yml
version: 2
build:
os: "ubuntu-20.04"
tools:
python: "3.10"
formats: []
python:
install:
- method: pip
path: .
extra_requirements:
- docs
================================================
FILE: CNAME
================================================
deeppavlov.ai
================================================
FILE: Jenkinsfile
================================================
node('cuda-module') {
timestamps {
try {
stage('Clean') {
sh "rm -rf .[^.] .??* *"
}
stage('Checkout') {
checkout scm
}
stage('Setup') {
env.TFHUB_CACHE_DIR="tfhub_cache"
sh """
EPOCH=\$(date +%s) docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG build
"""
}
stage('Tests') {
sh """
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py36 py37
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py38 py39
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG up py310 py311
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG ps | grep Exit | grep -v 'Exit 0' && exit 1 || exit 0
"""
currentBuild.result = 'SUCCESS'
}
}
catch(e) {
currentBuild.result = 'FAILURE'
throw e
}
finally {
sh """
docker-compose -f utils/Docker/docker-compose.yml -p $BUILD_TAG rm -f
docker network rm \$(echo $BUILD_TAG | awk '{print tolower(\$0)}')_default
"""
emailext to: "\${DEFAULT_RECIPIENTS}",
subject: "${env.JOB_NAME} - Build # ${currentBuild.number} - ${currentBuild.result}!",
body: '${BRANCH_NAME} - ${BUILD_URL}',
attachLog: true
}
}
}
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2018 Neural Systems and Deep Learning Laboratory
Moscow Institute of Physics and Technology
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: MANIFEST.in
================================================
include README.MD
include LICENSE
include requirements.txt
include deeppavlov/requirements/*.txt
recursive-include deeppavlov *.json
recursive-include deeppavlov *.md
================================================
FILE: README.md
================================================
# DeepPavlov 1.0
[](LICENSE)

[](https://pepy.tech/project/deeppavlov)
[](https://forum.deeppavlov.ai/)
[](https://demo.deeppavlov.ai/)
DeepPavlov 1.0 is an open-source NLP framework built on [PyTorch](https://pytorch.org/) and [transformers](https://github.com/huggingface/transformers). DeepPavlov 1.0 is created for modular and configuration-driven development of state-of-the-art NLP models and supports a wide range of NLP model applications. DeepPavlov 1.0 is designed for practitioners with limited knowledge of NLP/ML.
## Quick Links
|name|Description|
|--|--|
| ⭐️ [*Demo*](https://demo.deeppavlov.ai/)|Check out our NLP models in the online demo|
| 📚 [*Documentation*](http://docs.deeppavlov.ai/)|How to use DeepPavlov 1.0 and its features|
| 🚀 [*Model List*](http://docs.deeppavlov.ai/en/master/features/overview.html)|Find the NLP model you need in the list of available models|
| 🪐 [*Contribution Guide*](http://docs.deeppavlov.ai/en/master/devguides/contribution_guide.html)|Please read the contribution guidelines before making a contribution|
| 🎛 [*Issues*](https://github.com/deeppavlov/DeepPavlov/issues)|If you have an issue with DeepPavlov, please let us know|
| ⏩ [*Forum*](https://forum.deeppavlov.ai/)|Please let us know if you have a problem with DeepPavlov|
| 📦 [*Blogs*](https://medium.com/deeppavlov)|Read about our current development|
| 🦙 [Extended colab tutorials](https://github.com/deeppavlov/dp_tutorials)|Check out the code tutorials for our models|
| 🌌 [*Docker Hub*](https://hub.docker.com/u/deeppavlov/)|Check out the Docker images for rapid deployment|
| 👩🏫 [*Feedback*](https://forms.gle/i64fowQmiVhMMC7f9)|Please leave us your feedback to make DeepPavlov better|
## Installation
0. DeepPavlov supports `Linux`, `Windows 10+` (through WSL/WSL2), `MacOS` (Big Sur+) platforms, `Python 3.6`, `3.7`, `3.8`, `3.9` and `3.10`.
Depending on the model used, you may need from 4 to 16 GB RAM.
1. Create and activate a virtual environment:
* `Linux`
```
python -m venv env
source ./env/bin/activate
```
2. Install the package inside the environment:
```
pip install deeppavlov
```
## QuickStart
There is a bunch of great pre-trained NLP models in DeepPavlov. Each model is
determined by its config file.
List of models is available on
[the doc page](http://docs.deeppavlov.ai/en/master/features/overview.html) in
the `deeppavlov.configs` (Python):
```python
from deeppavlov import configs
```
When you're decided on the model (+ config file), there are two ways to train,
evaluate and infer it:
* via [Command line interface (CLI)](#command-line-interface-cli) and
* via [Python](#python).
#### GPU requirements
By default, DeepPavlov installs models requirements from PyPI. PyTorch from PyPI could not support your device CUDA
capability. To run supported DeepPavlov models on GPU you should have [CUDA](https://developer.nvidia.com/cuda-toolkit)
compatible with used GPU and [PyTorch version](deeppavlov/requirements/pytorch.txt) required by DeepPavlov models.
See [docs](https://docs.deeppavlov.ai/en/master/intro/quick_start.html#using-gpu) for details.
GPU with Pascal or newer architecture and 4+ GB VRAM is recommended.
### Command line interface (CLI)
To get predictions from a model interactively through CLI, run
```bash
python -m deeppavlov interact <config_path> [-d] [-i]
```
* `-d` downloads required data - pretrained model files and embeddings (optional).
* `-i` installs model requirements (optional).
You can train it in the same simple way:
```bash
python -m deeppavlov train <config_path> [-d] [-i]
```
Dataset will be downloaded regardless of whether there was `-d` flag or not.
To train on your own data you need to modify dataset reader path in the
[train config doc](http://docs.deeppavlov.ai/en/master/intro/config_description.html#train-config).
The data format is specified in the corresponding model doc page.
There are even more actions you can perform with configs:
```bash
python -m deeppavlov <action> <config_path> [-d] [-i]
```
* `<action>` can be
* `install` to install model requirements (same as `-i`),
* `download` to download model's data (same as `-d`),
* `train` to train the model on the data specified in the config file,
* `evaluate` to calculate metrics on the same dataset,
* `interact` to interact via CLI,
* `riseapi` to run a REST API server (see
[doc](http://docs.deeppavlov.ai/en/master/integrations/rest_api.html)),
* `predict` to get prediction for samples from *stdin* or from
*<file_path>* if `-f <file_path>` is specified.
* `<config_path>` specifies path (or name) of model's config file
* `-d` downloads required data
* `-i` installs model requirements
### Python
To get predictions from a model interactively through Python, run
```python
from deeppavlov import build_model
model = build_model(<config_path>, install=True, download=True)
# get predictions for 'input_text1', 'input_text2'
model(['input_text1', 'input_text2'])
```
where
* `install=True` installs model requirements (optional),
* `download=True` downloads required data from web - pretrained model files and embeddings (optional),
* `<config_path>` is model name (e.g. `'ner_ontonotes_bert_mult'`), path to the chosen model's config file (e.g.
`"deeppavlov/configs/ner/ner_ontonotes_bert_mult.json"`), or `deeppavlov.configs` attribute (e.g.
`deeppavlov.configs.ner.ner_ontonotes_bert_mult` without quotation marks).
You can train it in the same simple way:
```python
from deeppavlov import train_model
model = train_model(<config_path>, install=True, download=True)
```
To train on your own data you need to modify dataset reader path in the
[train config doc](http://docs.deeppavlov.ai/en/master/intro/config_description.html#train-config).
The data format is specified in the corresponding model doc page.
You can also calculate metrics on the dataset specified in your config file:
```python
from deeppavlov import evaluate_model
model = evaluate_model(<config_path>, install=True, download=True)
```
DeepPavlov also [allows](https://docs.deeppavlov.ai/en/master/intro/python.html) to build a model from components for
inference using Python.
## License
DeepPavlov is Apache 2.0 - licensed.
## Citation
```
@inproceedings{savkin-etal-2024-deeppavlov,
title = "DeepPavlov 1.0: Your Gateway to Advanced NLP Models Backed by Transformers and Transfer Learning",
author = "Savkin Maksim and Voznyuk Anastasia and Ignatov Fedor and Korzanova Anna and Karpov Dmitry and Popov Alexander and Konovalov Vasily"
editor = "Hernandez Farias and Delia Irazu and Hope Tom and Li Manling",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-demo.47",
pages = "465--474",
abstract = "We present DeepPavlov 1.0, an open-source framework for using Natural Language Processing (NLP) models by leveraging transfer learning techniques. DeepPavlov 1.0 is created for modular and configuration-driven development of state-of-the-art NLP models and supports a wide range of NLP model applications. DeepPavlov 1.0 is designed for practitioners with limited knowledge of NLP/ML. DeepPavlov is based on PyTorch and supports HuggingFace transformers. DeepPavlov is publicly released under the Apache 2.0 license and provides access to an online demo.",
}
```
================================================
FILE: _config.yml
================================================
theme: jekyll-theme-leap-day
google_analytics: UA-139843736-5
include:
- _static
================================================
FILE: _layouts/default.html
================================================
<!doctype html>
<html lang="{{ site.lang | default: "en-US" }}">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
{% seo %}
<link rel="stylesheet" href="{{ '/assets/css/style.css?v=' | append: site.github.build_revision | relative_url }}">
<script src="https://code.jquery.com/jquery-3.3.0.min.js" integrity="sha256-RTQy8VOmNlT6b2PIRur37p6JEBZUE7o8wPgMvu18MC4=" crossorigin="anonymous"></script>
<script src="{{ '/assets/js/main.js' | relative_url }}"></script>
<!--[if lt IE 9]>
<script src="https://cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv.min.js" integrity="sha256-3Jy/GbSLrg0o9y5Z5n1uw0qxZECH7C6OQpVBgNFYa0g=" crossorigin="anonymous"></script>
<![endif]-->
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
<link rel="stylesheet" type="text/css" href="//cdnjs.cloudflare.com/ajax/libs/cookieconsent2/3.1.0/cookieconsent.min.css" />
<script src="//cdnjs.cloudflare.com/ajax/libs/cookieconsent2/3.1.0/cookieconsent.min.js"></script>
<script>
window.addEventListener("load", function(){
window.cookieconsent.initialise({
"palette": {
"popup": {
"background": "#237afc"
},
"button": {
"background": "#fff",
"text": "#237afc"
}
},
"showLink": false,
"position": "bottom-right",
"theme": "classic",
"content": {
"message": "This website uses cookies. By continuing to use this site, you accept our use of cookies.",
"dismiss": "ACCEPT & CLOSE"
}
})});
</script>
</head>
<body>
<header>
<h1>{{ site.title | default: site.github.repository_name }}</h1>
<p>{{ site.description | default: site.github.project_tagline }}</p>
</header>
<div id="banner">
<span id="logo"></span>
<a href="{{ site.github.repository_url }}" class="button fork"><strong>View On GitHub</strong></a>
{% if site.show_downloads %}
<div class="downloads">
<span>Downloads:</span>
<ul>
<li><a href="{{ site.github.zip_url }}" class="button">ZIP</a></li>
<li><a href="{{ site.github.tar_url }}" class="button">TAR</a></li>
</ul>
</div>
{% endif %}
</div><!-- end banner -->
<div class="wrapper">
<nav>
<ul></ul>
</nav>
<section>
{{ content }}
</section>
<footer>
{% if site.github.is_project_page %}
<p>Project maintained by <a href="{{ site.github.owner_url }}">{{ site.github.owner_name }}</a></p>
{% endif %}
<p><small>Hosted on GitHub Pages — Theme by <a href="https://twitter.com/michigangraham">mattgraham</a></small></p>
</footer>
</div>
{% if site.google_analytics %}
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', '{{ site.google_analytics }}', 'auto');
ga('send', 'pageview');
</script>
{% endif %}
</body>
</html>
================================================
FILE: deeppavlov/__init__.py
================================================
# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from pathlib import Path
from ._meta import __author__, __description__, __email__, __keywords__, __license__, __version__
from .configs import configs
from .core.commands.infer import build_model
from .core.commands.train import train_evaluate_model_from_config
from .core.common.base import Element, Model
from .core.common.chainer import Chainer
from .core.common.log import init_logger
from .download import deep_download
# TODO: make better
def train_model(config: [str, Path, dict], install: bool = False,
download: bool = False, recursive: bool = False) -> Chainer:
train_evaluate_model_from_config(config, install=install, download=download, recursive=recursive)
return build_model(config, load_trained=True)
def evaluate_model(config: [str, Path, dict], install: bool = False,
download: bool = False, recursive: bool = False) -> dict:
return train_evaluate_model_from_config(config, to_train=False, install=install,
download=download, recursive=recursive)
# check version
assert sys.hexversion >= 0x3060000, 'Does not work in python3.5 or lower'
# resolve conflicts with previous DeepPavlov installations versioned up to 0.0.9
dot_dp_path = Path('~/.deeppavlov').expanduser().resolve()
if dot_dp_path.is_file():
dot_dp_path.unlink()
# initiate logging
init_logger()
================================================
FILE: deeppavlov/__main__.py
================================================
if __name__ == '__main__':
from .deep import main
main()
================================================
FILE: deeppavlov/_meta.py
================================================
__version__ = '1.7.0'
__author__ = 'Neural Networks and Deep Learning lab, MIPT'
__description__ = 'An open source library for building end-to-end dialog systems and training chatbots.'
__keywords__ = ['NLP', 'NER', 'SQUAD', 'Intents', 'Chatbot']
__license__ = 'Apache License, Version 2.0'
__email__ = 'info@deeppavlov.ai'
================================================
FILE: deeppavlov/configs/__init__.py
================================================
from pathlib import Path
from typing import Iterator, Dict, Union, Iterable
class Struct:
def __iter__(self) -> Iterator[str]:
return iter(self._keys)
def __len__(self) -> int:
return len(self._keys)
def __init__(self, tree: Dict[str, Union[dict, Path]]) -> None:
self._keys = set()
for key, value in tree.items():
key = key.replace('.', '_')
self._keys.add(key)
setattr(self, key,
Struct(value) if isinstance(value, dict) else value)
self._keys = frozenset(self._keys)
self.keys = lambda: self._keys
def _asdict(self, *, to_string: bool=False) -> dict:
res = []
for key in self._keys:
value = getattr(self, key)
if isinstance(value, Struct):
value = value._asdict(to_string=to_string)
elif to_string:
value = str(value)
res.append((key, value))
return dict(res)
def __getitem__(self, key: str) -> Union[dict, Path]:
if key not in self._keys:
raise KeyError(key)
item = getattr(self, key)
if isinstance(item, Struct):
item = item._asdict()
return item
def __dir__(self) -> Iterable:
return self._keys
def _ipython_key_completions_(self) -> Iterable:
return self._keys
def __str__(self) -> str:
return str(self._asdict(to_string=True))
def __repr__(self) -> str:
return f'Struct({repr(self._asdict())})'
def _repr_pretty_(self, p, cycle):
"""method that defines ``Struct``'s pretty printing rules for iPython
Args:
p (IPython.lib.pretty.RepresentationPrinter): pretty printer object
cycle (bool): is ``True`` if pretty detected a cycle
"""
if cycle:
p.text('Struct(...)')
else:
with p.group(7, 'Struct(', ')'):
p.pretty(self._asdict())
def _build_configs_tree() -> Struct:
root = Path(__file__).resolve().parent
tree = {}
for config in root.glob('**/*.json'):
leaf = tree
for part in config.relative_to(root).parent.parts:
if part not in leaf:
leaf[part] = {}
leaf = leaf[part]
leaf[config.stem] = config
return Struct(tree)
configs = _build_configs_tree()
================================================
FILE: deeppavlov/configs/classifiers/boolqa_rubert.json
================================================
{
"dataset_reader": {
"class_name": "boolqa_reader",
"data_path": "{DOWNLOADS_PATH}/boolqa_data",
"language": "ru"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 243
},
"chainer": {
"in": ["text_a", "text_b"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 128,
"in": ["text_a", "text_b"],
"out": ["bert_features"]
},
{
"class_name": "torch_transformers_classifier",
"n_classes": 2,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODELS_PATH}/boolqa_rubert/model_rubert",
"load_path": "{MODELS_PATH}/boolqa_rubert/model_rubert",
"optimizer": "AdamW",
"optimizer_parameters": {"lr": 2e-05},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y"],
"out": ["predictions"]
}
],
"out": ["predictions"]
},
"train": {
"epochs": 50,
"batch_size": 32,
"train_metrics": ["f1", "acc"],
"metrics": ["f1", "acc"],
"validation_patience": 5,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"evaluation_targets": ["valid", "train"],
"show_examples": false,
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"TRANSFORMER": "DeepPavlov/rubert-base-cased"
}
}
}
================================================
FILE: deeppavlov/configs/classifiers/few_shot_roberta.json
================================================
{
"chainer": {
"in": ["texts", "dataset"],
"in_y": ["y_true"],
"pipe": [
{
"class_name": "dnnc_pair_generator",
"in": ["texts", "dataset"],
"out": ["x", "x_support", "x_populated", "y_support"],
"bidirectional": true
},
{
"class_name": "torch_transformers_preprocessor",
"in": ["x_populated", "x_support"],
"out": ["bert_features"],
"vocab_file": "{BASE_MODEL}",
"do_lower_case": true,
"max_seq_length": 128
},
{
"class_name": "torch_transformers_classifier",
"main": true,
"in": ["bert_features"],
"out": ["simmilarity_scores"],
"n_classes": 2,
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"is_binary": "{BINARY_CLASSIFICATION}"
},
{
"class_name": "dnnc_proba2labels",
"is_binary": "{BINARY_CLASSIFICATION}",
"in": ["simmilarity_scores", "x", "x_populated", "x_support", "y_support"],
"out": ["y_pred"],
"confidence_threshold": 0.0
}
],
"out": ["y_pred"]
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"MODEL_PATH": "{ROOT_PATH}/models/fewshot/roberta_nli_mrpc_1_10",
"BINARY_CLASSIFICATION": true,
"BASE_MODEL": "roberta-base"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/classifiers/fewshot/roberta_nli_mrpc_1_10.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_cola_roberta.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": "sentence",
"label": "label",
"seed": 42
},
"chainer": {
"in": ["x"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 64,
"in": ["x"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 32,
"metrics": ["matthews_correlation"],
"validation_patience": 10,
"val_every_n_batches": 250,
"log_every_n_batches": 250,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "glue",
"TASK": "cola",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/glue/glue_cola_roberta.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_mnli_cased_bert_torch.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "glue",
"name": "mnli",
"train": "train",
"valid": "validation_matched",
"test": "test_matched"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["hypothesis", "premise"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["hypothesis", "premise"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 128,
"in": ["hypothesis", "premise"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 64,
"metrics": ["accuracy"],
"validation_patience": 10,
"val_every_n_batches": 250,
"log_every_n_batches": 250,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mnli_torch_cased_bert",
"BASE_MODEL": "bert-base-cased"
}
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_mnli_mm_cased_bert_torch.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "glue",
"name": "mnli",
"train": "train",
"valid": "validation_mismatched",
"test": "test_mismatched"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["hypothesis", "premise"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["hypothesis", "premise"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 128,
"in": ["hypothesis", "premise"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 64,
"metrics": ["accuracy"],
"validation_patience": 10,
"val_every_n_batches": 250,
"log_every_n_batches": 250,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_mnli_mm_torch_cased_bert",
"BASE_MODEL": "bert-base-cased"
}
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation_matched",
"test": "test_matched"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["hypothesis", "premise"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["hypothesis", "premise"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 128,
"in": ["hypothesis", "premise"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 1e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 4,
"metrics": ["accuracy"],
"validation_patience": 10,
"val_every_n_batches": 250,
"log_every_n_batches": 250,
"show_examples": false,
"evaluation_targets": ["valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "glue",
"TASK": "mnli",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/0.16/classifiers/glue_mnli.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_mrpc_roberta.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["sentence1", "sentence2"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["sentence1", "sentence2"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 256,
"in": ["sentence1", "sentence2"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 1e-06
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 4,
"metrics": ["accuracy"],
"epochs": 2,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "glue",
"TASK": "mrpc",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/glue/glue_mrpc_roberta.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_qnli_roberta.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["question", "sentence"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["question", "sentence"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 128,
"in": ["question", "sentence"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 16,
"metrics": ["accuracy"],
"validation_patience": 10,
"val_every_n_batches": 250,
"log_every_n_batches": 250,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "glue",
"TASK": "qnli",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/glue/glue_qnli_roberta.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_qqp_roberta.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["question1", "question2"],
"label": "label",
"use_label_name": false,
"seed": 42
},
"chainer": {
"in": ["question1", "question2"],
"in_y": ["y_ids"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 128,
"in": ["question1", "question2"],
"out": ["bert_features"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": 2,
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": 2,
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
}
],
"out": ["y_pred_ids"]
},
"train": {
"batch_size": 16,
"metrics": [
"f1",
"accuracy"
],
"validation_patience": 10,
"val_every_n_batches": 250,
"log_every_n_batches": 250,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "glue",
"TASK": "qqp",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/glue/glue_qqp_roberta.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_rte_cased_bert_torch.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "glue",
"name": "rte",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["sentence1", "sentence2"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["sentence1", "sentence2"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 256,
"in": ["sentence1", "sentence2"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 32,
"metrics": ["accuracy"],
"validation_patience": 10,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/glue_rte_torch_cased_bert",
"BASE_MODEL": "bert-base-cased"
}
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_rte_roberta_mnli.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["sentence1", "sentence2"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["sentence1", "sentence2"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 256,
"in": ["sentence1", "sentence2"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 1e-06
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 4,
"metrics": ["accuracy"],
"epochs": 2,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large-mnli",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "glue",
"TASK": "rte",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/0.16/classifiers/glue_rte.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_sst2_roberta.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": "sentence",
"label": "label",
"seed": 42
},
"chainer": {
"in": ["x"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "bert-base-cased",
"do_lower_case": false,
"max_seq_length": 64,
"in": ["x"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "bert-base-cased",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 128,
"metrics": ["accuracy"],
"validation_patience": 10,
"val_every_n_batches": 250,
"log_every_n_batches": 250,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "glue",
"TASK": "sst2",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/glue/glue_sst2_roberta.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_stsb_roberta.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["sentence1", "sentence2"],
"label": "label",
"use_label_name": false,
"seed": 42
},
"chainer": {
"in": ["sentence1", "sentence2"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 64,
"in": ["sentence1", "sentence2"],
"out": ["bert_features"]
},
{
"class_name": "torch_transformers_classifier",
"n_classes": 1,
"return_probas": false,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y"],
"out": ["y_pred"]
}
],
"out": ["y_pred"]
},
"train": {
"batch_size": 32,
"metrics": [
"pearson_correlation",
"spearman_correlation"
],
"validation_patience": 10,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "glue",
"TASK": "stsb",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/glue/glue_stsb_roberta.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/glue/glue_wnli_roberta.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["sentence1", "sentence2"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["sentence1", "sentence2"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 192,
"truncation": "longest_first",
"padding": "longest",
"in": ["sentence1", "sentence2"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 1e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 8,
"metrics": ["accuracy"],
"epochs": 1,
"val_every_n_batches": 250,
"log_every_n_batches": 250,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "glue",
"TASK": "wnli",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/0.16/classifiers/glue_wnli_roberta.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/insults_kaggle_bert.json
================================================
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"x": "Comment",
"y": "Class",
"data_path": "{DOWNLOADS_PATH}/insults_data"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": true,
"max_seq_length": 64,
"in": [
"x"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": [
"y"
],
"out": [
"y_ids"
]
},
{
"in": [
"y_ids"
],
"out": [
"y_onehot"
],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 1e-05
},
"learning_rate_drop_patience": 5,
"learning_rate_drop_div": 2.0,
"in": [
"bert_features"
],
"in_y": [
"y_ids"
],
"out": [
"y_pred_probas"
]
},
{
"in": [
"y_pred_probas"
],
"out": [
"y_pred_ids"
],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": [
"y_pred_ids"
],
"out": [
"y_pred_labels"
],
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"epochs": 100,
"batch_size": 64,
"metrics": [
{
"name": "roc_auc",
"inputs": [
"y_onehot",
"y_pred_probas"
]
},
"accuracy",
"f1_macro"
],
"validation_patience": 5,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": [
"train",
"valid",
"test"
],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"TRANSFORMER": "bert-base-uncased",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/insults_kaggle_torch_bert"
},
"download": [
{
"url": "http://files.deeppavlov.ai/datasets/insults_data.tar.gz",
"subdir": "{DOWNLOADS_PATH}"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_torch_bert_v5.tar.gz",
"subdir": "{MODELS_PATH}/classifiers"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json
================================================
{
"dataset_reader": {
"class_name": "paraphraser_reader",
"data_path": "{DOWNLOADS_PATH}/paraphraser_data",
"do_lower_case": false
},
"dataset_iterator": {
"class_name": "siamese_iterator",
"seed": 243,
"len_valid": 500
},
"chainer": {
"in": ["text_a", "text_b"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 64,
"in": ["text_a", "text_b"],
"out": ["bert_features"]
},
{
"class_name": "torch_transformers_classifier",
"n_classes": 2,
"return_probas": false,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"attention_probs_keep_prob": 0.11,
"hidden_keep_prob": 1.0,
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 1.89e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 1.5,
"in": [
"bert_features"
],
"in_y": [
"y"
],
"out": [
"predictions"
]
}
],
"out": ["predictions"]
},
"train": {
"epochs": 100,
"batch_size": 64,
"metrics": [
"f1",
"accuracy"
],
"validation_patience": 7,
"val_every_n_batches": 50,
"log_every_n_batches": 50,
"evaluation_targets": [
"train",
"valid",
"test"
],
"tensorboard_log_dir": "{MODEL_PATH}/",
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_2L"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_2L.tar.gz",
"subdir": "{MODELS_PATH}"
},
{
"url": "http://files.deeppavlov.ai/datasets/paraphraser.zip",
"subdir": "{DOWNLOADS_PATH}/paraphraser_data"
},
{
"url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip",
"subdir": "{DOWNLOADS_PATH}/paraphraser_data"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json
================================================
{
"dataset_reader": {
"class_name": "paraphraser_reader",
"data_path": "{DOWNLOADS_PATH}/paraphraser_data",
"do_lower_case": false
},
"dataset_iterator": {
"class_name": "siamese_iterator",
"seed": 243,
"len_valid": 500
},
"chainer": {
"in": ["text_a", "text_b"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 64,
"in": ["text_a", "text_b"],
"out": ["bert_features"]
},
{
"class_name": "torch_transformers_classifier",
"n_classes": 2,
"return_probas": false,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"attention_probs_keep_prob": 0.0,
"hidden_keep_prob": 0.67,
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 7.22e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 1.5,
"in": [
"bert_features"
],
"in_y": [
"y"
],
"out": [
"predictions"
]
}
],
"out": ["predictions"]
},
"train": {
"epochs": 100,
"batch_size": 64,
"metrics": [
"f1",
"accuracy"
],
"validation_patience": 7,
"val_every_n_batches": 50,
"log_every_n_batches": 50,
"evaluation_targets": [
"train",
"valid",
"test"
],
"tensorboard_log_dir": "{MODEL_PATH}/",
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/paraphraser_convers_distilrubert_6L"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/paraphraser_convers_distilrubert_6L.tar.gz",
"subdir": "{MODELS_PATH}"
},
{
"url": "http://files.deeppavlov.ai/datasets/paraphraser.zip",
"subdir": "{DOWNLOADS_PATH}/paraphraser_data"
},
{
"url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip",
"subdir": "{DOWNLOADS_PATH}/paraphraser_data"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/paraphraser_rubert.json
================================================
{
"dataset_reader": {
"class_name": "paraphraser_reader",
"data_path": "{DOWNLOADS_PATH}/paraphraser_data",
"do_lower_case": false
},
"dataset_iterator": {
"class_name": "siamese_iterator",
"seed": 243,
"len_valid": 500
},
"chainer": {
"in": ["text_a", "text_b"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 64,
"in": ["text_a", "text_b"],
"out": ["bert_features"]
},
{
"class_name": "torch_transformers_classifier",
"n_classes": 2,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {"lr": 2e-05},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y"],
"out": ["predictions"]
}
],
"out": ["predictions"]
},
"train": {
"batch_size": 64,
"pytest_max_batches": 2,
"train_metrics": ["f1", "acc"],
"metrics": ["f1", "acc"],
"validation_patience": 7,
"val_every_n_batches": 50,
"log_every_n_batches": 50,
"evaluation_targets": ["valid", "test"],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/paraphraser_rubert_torch",
"TRANSFORMER": "DeepPavlov/rubert-base-cased"
},
"download": [
{
"url": "http://files.deeppavlov.ai/datasets/paraphraser.zip",
"subdir": "{DOWNLOADS_PATH}/paraphraser_data"
},
{
"url": "http://files.deeppavlov.ai/datasets/paraphraser_gold.zip",
"subdir": "{DOWNLOADS_PATH}/paraphraser_data"
},
{
"url": "http://files.deeppavlov.ai/v1/classifiers/paraphraser_rubert/paraphraser_rubert_v1.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/query_pr.json
================================================
{
"dataset_reader": {
"class_name": "sq_reader",
"data_path": "{DOWNLOADS_PATH}/query_prediction/query_prediction_eng.pickle"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42
},
"chainer": {
"in": ["x"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 64,
"in": ["x"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {"lr": 1e-05},
"learning_rate_drop_patience": 5,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"epochs": 100,
"batch_size": 64,
"metrics": [
"f1_macro",
"accuracy",
{
"name": "roc_auc",
"inputs": ["y_onehot", "y_pred_probas"]
}
],
"validation_patience": 10,
"val_every_n_batches": 100,
"log_every_n_batches": 100,
"show_examples": false,
"evaluation_targets": ["train", "valid", "test"],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"TRANSFORMER": "haisongzhang/roberta-tiny-cased",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/query_prediction_eng"
},
"download": [
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/query_prediction_eng.tar.gz",
"subdir": "{MODELS_PATH}/classifiers/query_prediction_eng"
},
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/query_prediction_eng.pickle",
"subdir": "{DOWNLOADS_PATH}/query_prediction"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/rusentiment_bert.json
================================================
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"x": "text",
"y": "label",
"data_path": "{DOWNLOADS_PATH}/rusentiment/",
"train": "rusentiment_random_posts.csv",
"test": "rusentiment_test.csv"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42,
"split_seed": 23,
"field_to_split": "train",
"split_fields": [
"train",
"valid"
],
"split_proportions": [
0.9,
0.1
]
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 64,
"in": [
"x"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": "y",
"out": "y_ids"
},
{
"in": "y_ids",
"out": "y_onehot",
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer_parameters": {"lr": 1e-05},
"learning_rate_drop_patience": 5,
"learning_rate_drop_div": 2.0,
"in": [
"bert_features"
],
"in_y": [
"y_onehot"
],
"out": [
"y_pred_probas"
]
},
{
"in": "y_pred_probas",
"out": "y_pred_ids",
"class_name": "proba2labels",
"max_proba": true
},
{
"in": "y_pred_ids",
"out": "y_pred_labels",
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"batch_size": 64,
"epochs": 100,
"metrics": [
"f1_weighted",
"f1_macro",
"accuracy",
{
"name": "roc_auc",
"inputs": [
"y_onehot",
"y_pred_probas"
]
}
],
"show_examples": false,
"pytest_max_batches": 2,
"validation_patience": 5,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"evaluation_targets": [
"train",
"valid",
"test"
],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_bert_torch",
"TRANSFORMER": "bert-base-multilingual-cased"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/classifiers/rusentiment_bert/rusentiment_bert_torch.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/rusentiment_convers_bert.json
================================================
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"x": "text",
"y": "label",
"data_path": "{DOWNLOADS_PATH}/rusentiment/",
"train": "rusentiment_random_posts.csv",
"test": "rusentiment_test.csv"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42,
"split_seed": 23,
"field_to_split": "train",
"split_fields": [
"train",
"valid"
],
"split_proportions": [
0.9,
0.1
]
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 64,
"in": [
"x"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": "y",
"out": "y_ids"
},
{
"in": "y_ids",
"out": "y_onehot",
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer_parameters": {"lr": 1e-05},
"learning_rate_drop_patience": 5,
"learning_rate_drop_div": 2.0,
"in": [
"bert_features"
],
"in_y": [
"y_onehot"
],
"out": [
"y_pred_probas"
]
},
{
"in": "y_pred_probas",
"out": "y_pred_ids",
"class_name": "proba2labels",
"max_proba": true
},
{
"in": "y_pred_ids",
"out": "y_pred_labels",
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"batch_size": 64,
"epochs": 100,
"metrics": [
"f1_weighted",
"f1_macro",
"accuracy",
{
"name": "roc_auc",
"inputs": [
"y_onehot",
"y_pred_probas"
]
}
],
"show_examples": false,
"pytest_max_batches": 2,
"validation_patience": 5,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"evaluation_targets": [
"train",
"valid",
"test"
],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_bert_torch",
"TRANSFORMER": "DeepPavlov/rubert-base-cased-conversational"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/classifiers/rusentiment_convers_bert/rusentiment_convers_bert_torch.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json
================================================
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"x": "text",
"y": "label",
"data_path": "{DOWNLOADS_PATH}/rusentiment/",
"train": "rusentiment_random_posts.csv",
"test": "rusentiment_test.csv"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42,
"split_seed": 23,
"field_to_split": "train",
"split_fields": [
"train",
"valid"
],
"split_proportions": [
0.9,
0.1
]
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": true,
"max_seq_length": 64,
"in": [
"x"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": "y",
"out": "y_ids"
},
{
"in": "y_ids",
"out": "y_onehot",
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"attention_probs_keep_prob": 0.78,
"hidden_keep_prob": 0.89,
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 7.22e-05
},
"learning_rate_drop_patience": 5,
"learning_rate_drop_div": 1.5,
"in": [
"bert_features"
],
"in_y": [
"y_ids"
],
"out": [
"y_pred_probas"
]
},
{
"in": "y_pred_probas",
"out": "y_pred_ids",
"class_name": "proba2labels",
"max_proba": true
},
{
"in": "y_pred_ids",
"out": "y_pred_labels",
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"epochs": 100,
"batch_size": 64,
"metrics": [
"f1_weighted",
"f1_macro",
"accuracy",
{
"name": "roc_auc",
"inputs": [
"y_onehot",
"y_pred_probas"
]
}
],
"validation_patience": 5,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": [
"train",
"valid",
"test"
],
"tensorboard_log_dir": "{MODEL_PATH}/",
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_2L"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_2L.tar.gz",
"subdir": "{MODELS_PATH}/classifiers/"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json
================================================
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"x": "text",
"y": "label",
"data_path": "{DOWNLOADS_PATH}/rusentiment/",
"train": "rusentiment_random_posts.csv",
"test": "rusentiment_test.csv"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42,
"split_seed": 23,
"field_to_split": "train",
"split_fields": [
"train",
"valid"
],
"split_proportions": [
0.9,
0.1
]
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": true,
"max_seq_length": 64,
"in": [
"x"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": "y",
"out": "y_ids"
},
{
"in": "y_ids",
"out": "y_onehot",
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"attention_probs_keep_prob": 0.78,
"hidden_keep_prob": 0,
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 4.56e-05
},
"learning_rate_drop_patience": 5,
"learning_rate_drop_div": 1.5,
"in": [
"bert_features"
],
"in_y": [
"y_ids"
],
"out": [
"y_pred_probas"
]
},
{
"in": "y_pred_probas",
"out": "y_pred_ids",
"class_name": "proba2labels",
"max_proba": true
},
{
"in": "y_pred_ids",
"out": "y_pred_labels",
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"epochs": 100,
"batch_size": 64,
"metrics": [
"f1_weighted",
"f1_macro",
"accuracy",
{
"name": "roc_auc",
"inputs": [
"y_onehot",
"y_pred_probas"
]
}
],
"validation_patience": 5,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": [
"train",
"valid",
"test"
],
"tensorboard_log_dir": "{MODEL_PATH}/",
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"TRANSFORMER": "DeepPavlov/distilrubert-base-cased-conversational",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_convers_distilrubert_6L"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rusentiment_convers_distilrubert_6L.tar.gz",
"subdir": "{MODELS_PATH}/classifiers/"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/sentiment_sst_conv_bert.json
================================================
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"x": "text",
"y": "fine_grained_label",
"data_path": "{DOWNLOADS_PATH}/stanfordSentimentTreebank",
"train": "train_fine_grained.csv",
"valid": "valid_fine_grained.csv",
"test": "test_fine_grained.csv"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 64,
"in": [
"x"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": "y",
"out": "y_ids"
},
{
"in": "y_ids",
"out": "y_onehot",
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer_parameters": {"lr": 1e-05},
"learning_rate_drop_patience": 5,
"learning_rate_drop_div": 2.0,
"in": [
"bert_features"
],
"in_y": [
"y_onehot"
],
"out": [
"y_pred_probas"
]
},
{
"in": "y_pred_probas",
"out": "y_pred_ids",
"class_name": "proba2labels",
"max_proba": true
},
{
"in": "y_pred_ids",
"out": "y_pred_labels",
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"epochs": 100,
"batch_size": 64,
"metrics": [
"accuracy",
{
"name": "roc_auc",
"inputs": [
"y_onehot",
"y_pred_probas"
]
},
"f1_macro"
],
"validation_patience": 5,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": [
"train",
"valid",
"test"
],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_sst_bert_torch",
"TRANSFORMER": "DeepPavlov/bert-base-cased-conversational"
},
"download": [
{
"url": "http://files.deeppavlov.ai/datasets/stanfordSentimentTreebank.zip",
"subdir": "{DOWNLOADS_PATH}"
},
{
"url": "http://files.deeppavlov.ai/v1/classifiers/sentiment_sst_bert/sentiment_sst_bert_torch.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/sentiment_twitter.json
================================================
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"x": "Twit",
"y": "Class",
"data_path": "{DOWNLOADS_PATH}/sentiment_twitter_data"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": "y",
"out": "y_ids"
},
{
"in": "x",
"out": "x_tok",
"id": "my_tokenizer",
"class_name": "nltk_tokenizer",
"tokenizer": "wordpunct_tokenize"
},
{
"in": "x_tok",
"out": "x_emb",
"id": "my_embedder",
"class_name": "fasttext",
"load_path": "{DOWNLOADS_PATH}/embeddings/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize.bin",
"pad_zero": true
},
{
"in": "y_ids",
"out": "y_onehot",
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"in": [
"x_emb"
],
"in_y": [
"y_ids"
],
"out": [
"y_pred_probas"
],
"main": true,
"class_name": "torch_text_classification_model",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"embedding_size": "#my_embedder.dim",
"n_classes": "#classes_vocab.len",
"kernel_sizes_cnn": [
3,
5,
7
],
"filters_cnn": 256,
"dropout_rate": 0.5,
"dense_size": 64,
"optimizer": "SGD",
"optimizer_parameters": {
"lr": 0.0001,
"momentum": 0.9,
"weight_decay": 0.0001
}
},
{
"in": "y_pred_probas",
"out": "y_pred_ids",
"class_name": "proba2labels",
"max_proba": true
},
{
"in": "y_pred_ids",
"out": "y_pred_labels",
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"epochs": 100,
"batch_size": 128,
"metrics": [
"accuracy",
"f1_macro",
{
"name": "roc_auc",
"inputs": ["y_onehot", "y_pred_probas"]
}
],
"validation_patience": 5,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": [
"train",
"valid",
"test"
],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_twitter_torch"
},
"download": [
{
"url": "http://files.deeppavlov.ai/datasets/sentiment_twitter_data.tar.gz",
"subdir": "{DOWNLOADS_PATH}"
},
{
"url": "http://files.deeppavlov.ai/embeddings/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize/ft_native_300_ru_wiki_lenta_nltk_wordpunct_tokenize.bin",
"subdir": "{DOWNLOADS_PATH}/embeddings"
},
{
"url": "http://files.deeppavlov.ai/v1/classifiers/sentiment_twitter/sentiment_twitter_torch.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/superglue/superglue_boolq_roberta_mnli.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test",
"dev_percentage": 50
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["question", "passage"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["question", "passage"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 256,
"in": ["question", "passage"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"is_binary": "{BINARY_CLASSIFICATION}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05,
"weight_decay": 0.1
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"is_binary": "{BINARY_CLASSIFICATION}",
"confidence_threshold": 0.5
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 24,
"metrics": ["accuracy"],
"validation_patience": 10,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2,
"pytest_batch_size": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large-mnli",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "super_glue",
"TASK": "boolq",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}",
"BINARY_CLASSIFICATION": true
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/superglue/superglue_boolq_roberta_mnli.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/superglue/superglue_copa_roberta.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["contexts", "choices"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["contexts_list", "choices_list"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_multiplechoice_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 64,
"in": ["contexts_list", "choices_list"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_multiplechoice",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 16,
"metrics": ["accuracy"],
"validation_patience": 10,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["train", "valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2,
"pytest_batch_size": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "super_glue",
"TASK": "copa",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/0.17/classifiers/superglue/superglue_copa_roberta.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/superglue/superglue_record_roberta.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test",
"downsample_ratio": [1.8, 1.8, 1],
"do_index_correction": false
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["idx", "query", "passage", "entities", "num_examples"],
"label": "label",
"seed": 42,
"use_label_name": false
},
"chainer": {
"in": ["idx", "query", "passage", "entities", "num_examples"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 320,
"in": ["query", "passage"],
"out": ["bert_features"]
},
{
"class_name": "torch_transformers_classifier",
"n_classes": 2,
"return_probas": true,
"is_binary": "{BINARY_CLASSIFICATION}",
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05,
"weight_decay": 0.1
},
"learning_rate_drop_patience": 3,
"learning_rate_drop_div": 2.0,
"in": ["bert_features"],
"in_y": ["y"],
"out": ["y_pred_probas"]
},
{
"class_name": "proba2labels",
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"is_binary": "{BINARY_CLASSIFICATION}",
"max_proba": true
},
{
"class_name": "torch_record_postprocessor",
"is_binary": "{BINARY_CLASSIFICATION}",
"in": ["idx", "y", "y_pred_probas", "entities", "num_examples"],
"out": ["record_examples"]
}
],
"out": ["y_pred_probas"]
},
"train": {
"batch_size": 24,
"train_metrics": [
{
"name": "accuracy",
"inputs": ["y", "y_pred_ids"]
}
],
"metrics": [
{
"name": "record_em_score",
"inputs": ["record_examples"]
},
{
"name": "record_f1_score",
"inputs": ["record_examples"]
},
{
"name": "accuracy",
"inputs": ["y", "y_pred_ids"]
}
],
"validation_patience": 10,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"class_name": "torch_trainer",
"evaluation_targets": ["valid"],
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2,
"pytest_batch_size": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "roberta-large",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "super_glue",
"TASK": "record",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}",
"BINARY_CLASSIFICATION": false
},
"download": [
{
"url": "http://files.deeppavlov.ai/0.17/classifiers/superglue/superglue_record_roberta.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/superglue/superglue_wic_bert.json
================================================
{
"dataset_reader": {
"class_name": "huggingface_dataset_reader",
"path": "{COMPETITION}",
"name": "{TASK}",
"train": "train",
"valid": "validation",
"test": "test"
},
"dataset_iterator": {
"class_name": "huggingface_dataset_iterator",
"features": ["sentence1", "sentence2"],
"label": "label",
"seed": 42
},
"chainer": {
"in": ["sentence1", "sentence2"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{BASE_MODEL}",
"do_lower_case": false,
"max_seq_length": 256,
"in": ["sentence1", "sentence2"],
"out": ["bert_features"]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": ["y"],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": ["y"],
"out": ["y_ids"]
},
{
"in": ["y_ids"],
"out": ["y_onehot"],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {"lr": 2e-05},
"in": ["bert_features"],
"in_y": ["y_ids"],
"out": ["y_pred_probas"]
},
{
"in": ["y_pred_probas"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_labels"],
"ref": "classes_vocab"
}
],
"out": ["y_pred_labels"]
},
"train": {
"batch_size": 16,
"metrics": ["accuracy"],
"epochs": 10,
"validation_patience": 10,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"val_every_n_batches": 1000,
"show_examples": false,
"evaluation_targets": ["valid"],
"class_name": "torch_trainer",
"tensorboard_log_dir": "{MODEL_PATH}/",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BASE_MODEL": "bert-base-cased",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"COMPETITION": "super_glue",
"TASK": "wic",
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/superglue/superglue_wic_bert.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/classifiers/topics_distilbert_base_uncased.json
================================================
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"class_sep": ";",
"x": "text",
"y": "topic",
"data_path": "{DOWNLOADS_PATH}/dp_topics_downsampled_data/",
"train" : "train.csv",
"valid" : "valid.csv"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": true,
"max_seq_length": 128,
"in": [
"x"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": [
"y"
],
"out": [
"y_ids"
]
},
{
"in": [
"y_ids"
],
"out": [
"y_onehot"
],
"class_name": "one_hotter",
"id": "my_one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"multilabel": true,
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 1e-05
},
"learning_rate_drop_patience": 5,
"learning_rate_drop_div": 2.0,
"in": [
"bert_features"
],
"in_y": [
"y_onehot"
],
"out": [
"y_pred_probas"
]
},
{
"in": "y_pred_probas",
"out": "y_pred_ids",
"class_name": "proba2labels",
"max_proba": false,
"confidence_threshold": 0.5
},
{
"in": "y_pred_ids",
"out": "y_pred_labels",
"ref": "classes_vocab"
},
{
"ref": "my_one_hotter",
"in": "y_pred_ids",
"out": "y_pred_onehot"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"epochs": 100,
"batch_size": 64,
"metrics": [
{
"name": "f1_macro",
"inputs": [
"y_onehot",
"y_pred_onehot"
]
},
{
"name": "f1_weighted",
"inputs": [
"y_onehot",
"y_pred_onehot"
]
},
{
"name": "accuracy",
"inputs": [
"y",
"y_pred_labels"
]
},
{
"name": "roc_auc",
"inputs": [
"y_onehot",
"y_pred_probas"
]
}
],
"validation_patience": 10,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"log_every_n_batches": 100,
"show_examples": false,
"evaluation_targets": [
"train",
"valid",
"test"
],
"tensorboard_log_dir": "{MODEL_PATH}/logs",
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"TRANSFORMER": "distilbert-base-uncased",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/topic_distilbert_base_v0"
},
"download": [
{
"url": "http://files.deeppavlov.ai/datasets/dp_topics_downsampled_dataset_v0.tar.gz",
"subdir": "{DOWNLOADS_PATH}"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/topic_distilbert_base_v0.tar.gz",
"subdir": "{MODELS_PATH}/classifiers"
}
]
}
}
================================================
FILE: deeppavlov/configs/doc_retrieval/en_ranker_pop_wiki.json
================================================
{
"dataset_reader": {
"class_name": "odqa_reader",
"data_path": "{DOWNLOADS_PATH}/odqa/enwiki",
"save_path": "{DOWNLOADS_PATH}/odqa/enwiki.db",
"dataset_format": "wiki"
},
"dataset_iterator": {
"class_name": "sqlite_iterator",
"shuffle": false,
"load_path": "{DOWNLOADS_PATH}/odqa/enwiki_l100.db"
},
"chainer": {
"in": ["docs"],
"in_y": ["doc_ids", "doc_nums"],
"out": ["pop_doc_ids"],
"pipe": [
{
"class_name": "hashing_tfidf_vectorizer",
"id": "vectorizer",
"fit_on": ["docs", "doc_ids", "doc_nums"],
"save_path": "{MODELS_PATH}/odqa/enwiki_tfidf_matrix_par_lite.npz",
"load_path": "{MODELS_PATH}/odqa/enwiki_tfidf_matrix_par_lite.npz",
"tokenizer": {
"class_name": "stream_spacy_tokenizer",
"lemmas": true,
"lowercase": true,
"filter_stopwords": true,
"ngram_range": [1, 3]
}
},
{
"class_name": "tfidf_ranker",
"top_n": 100,
"in": ["docs"],
"out": ["tfidf_doc_ids", "tfidf_doc_scores"],
"vectorizer": "#vectorizer"
},
{
"class_name": "pop_ranker",
"pop_dict_path": "{DOWNLOADS_PATH}/odqa/enwiki_popularities.json",
"load_path": "{MODELS_PATH}/odqa/logreg_3features_v2.joblib",
"top_n": 100,
"in": ["tfidf_doc_ids", "tfidf_doc_scores"],
"out": ["pop_doc_ids", "pop_doc_scores"]
}
]
},
"train": {
"batch_size": 10000,
"evaluation_targets": [],
"class_name": "fit_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/enwiki_l100.tar.gz",
"subdir": "{DOWNLOADS_PATH}/odqa"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/enwiki_tfidf_matrix_par_lite.tar.gz",
"subdir": "{MODELS_PATH}/odqa"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/enwiki_popularities.tar.gz",
"subdir": "{DOWNLOADS_PATH}/odqa"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/ranking/logreg_3features_v2.joblib",
"subdir": "{MODELS_PATH}/odqa"
}
]
}
}
================================================
FILE: deeppavlov/configs/doc_retrieval/en_ranker_tfidf_wiki.json
================================================
{
"dataset_reader": {
"class_name": "odqa_reader",
"data_path": "{DOWNLOADS_PATH}/odqa/enwiki",
"save_path": "{DOWNLOADS_PATH}/odqa/enwiki.db",
"dataset_format": "wiki"
},
"dataset_iterator": {
"class_name": "sqlite_iterator",
"shuffle": false,
"load_path": "{DOWNLOADS_PATH}/odqa/enwiki_l100.db"
},
"chainer": {
"in": ["docs"],
"in_y": ["doc_ids", "doc_nums"],
"out": ["tfidf_doc_ids"],
"pipe": [
{
"class_name": "hashing_tfidf_vectorizer",
"id": "vectorizer",
"fit_on": ["docs", "doc_ids", "doc_nums"],
"save_path": "{MODELS_PATH}/odqa/enwiki_tfidf_matrix_par_lite.npz",
"load_path": "{MODELS_PATH}/odqa/enwiki_tfidf_matrix_par_lite.npz",
"tokenizer": {
"class_name": "stream_spacy_tokenizer",
"lemmas": true,
"lowercase": true,
"filter_stopwords": true,
"ngram_range": [1, 3]
}
},
{
"class_name": "tfidf_ranker",
"top_n": 100,
"in": ["docs"],
"out": ["tfidf_doc_ids", "tfidf_doc_scores"],
"vectorizer": "#vectorizer"
}
]
},
"train": {
"batch_size": 10000,
"evaluation_targets": [],
"class_name": "fit_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/enwiki_l100.tar.gz",
"subdir": "{DOWNLOADS_PATH}/odqa"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/enwiki_tfidf_matrix_par_lite.tar.gz",
"subdir": "{MODELS_PATH}/odqa"
}
]
}
}
================================================
FILE: deeppavlov/configs/doc_retrieval/ru_ranker_tfidf_wiki.json
================================================
{
"dataset_reader": {
"class_name": "odqa_reader",
"data_path": "{DOWNLOADS_PATH}/odqa/ruwiki",
"save_path": "{DOWNLOADS_PATH}/odqa/ruwiki_par_page_compr.db",
"dataset_format": "wiki"
},
"dataset_iterator": {
"class_name": "sqlite_iterator",
"shuffle": false,
"load_path": "{DOWNLOADS_PATH}/odqa/ruwiki_par_page_compr.db"
},
"chainer": {
"in": ["docs"],
"in_y": ["doc_ids", "doc_nums"],
"out": ["tfidf_doc_ids"],
"pipe": [
{
"class_name": "hashing_tfidf_vectorizer",
"id": "vectorizer",
"fit_on": ["docs", "doc_ids", "doc_nums"],
"save_path": "{MODELS_PATH}/odqa/ruwiki_tfidf_matrix_compr.npz",
"load_path": "{MODELS_PATH}/odqa/ruwiki_tfidf_matrix_compr.npz",
"tokenizer": {
"class_name": "stream_spacy_tokenizer",
"spacy_model": "ru_core_news_sm",
"lemmas": true,
"lowercase": true,
"filter_stopwords": true,
"ngram_range": [1, 3]
}
},
{
"class_name": "tfidf_ranker",
"top_n": 100,
"in": ["docs"],
"out": ["tfidf_doc_ids", "tfidf_doc_scores"],
"vectorizer": "#vectorizer"
}
]
},
"train": {
"batch_size": 10000,
"evaluation_targets": [],
"class_name": "fit_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/ruwiki_par_page_compr.tar.gz",
"subdir": "{DOWNLOADS_PATH}/odqa"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/odqa/ruwiki_tfidf_matrix_compr.tar.gz",
"subdir": "{MODELS_PATH}/odqa"
}
]
}
}
================================================
FILE: deeppavlov/configs/embedder/bert_embedder.json
================================================
{
"chainer": {
"in": ["texts"],
"pipe": [
{
"class_name": "transformers_bert_preprocessor",
"vocab_file": "{BERT_PATH}/vocab.txt",
"do_lower_case": false,
"max_seq_length": 512,
"in": ["texts"],
"out": ["tokens", "subword_tokens", "subword_tok_ids", "startofword_markers", "attention_mask"]
},
{
"class_name": "transformers_bert_embedder",
"bert_config_path": "{BERT_PATH}/bert_config.json",
"load_path": "{BERT_PATH}",
"truncate": true,
"in": ["subword_tok_ids", "startofword_markers", "attention_mask"],
"out": ["word_emb", "subword_emb", "max_emb", "mean_emb", "pooler_output"]
}
],
"out": ["tokens", "word_emb", "subword_tokens", "subword_emb", "max_emb", "mean_emb", "pooler_output"]
},
"train": {},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"BERT_PATH": "{DOWNLOADS_PATH}/bert_models/multi_cased_L-12_H-768_A-12_pt"
},
"labels": {},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/multi_cased_L-12_H-768_A-12_pt.tar.gz",
"subdir": "{DOWNLOADS_PATH}/bert_models"
}
]
}
}
================================================
FILE: deeppavlov/configs/embedder/bert_sentence_embedder.json
================================================
{
"chainer": {
"in": ["texts"],
"pipe": [
{
"class_name": "transformers_bert_preprocessor",
"vocab_file": "{BERT_PATH}/vocab.txt",
"do_lower_case": false,
"max_seq_length": 512,
"in": ["texts"],
"out": ["tokens", "subword_tokens", "subword_tok_ids", "startofword_markers", "attention_mask"]
},
{
"class_name": "transformers_bert_embedder",
"bert_config_path": "{BERT_PATH}/config.json",
"load_path": "{BERT_PATH}",
"truncate": false,
"in": ["subword_tok_ids", "startofword_markers", "attention_mask"],
"out": ["word_emb", "subword_emb", "max_emb", "mean_emb", "pooler_output"]
}
],
"out": ["max_emb", "mean_emb", "pooler_output"]
},
"train": {},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"BERT_PATH": "{DOWNLOADS_PATH}/bert_models/sentence_multi_cased_L-12_H-768_A-12_pt_v1"
},
"labels": {},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/sentence_multi_cased_L-12_H-768_A-12_pt_v1.tar.gz",
"subdir": "{DOWNLOADS_PATH}/bert_models"
}
]
}
}
================================================
FILE: deeppavlov/configs/entity_extraction/entity_detection_en.json
================================================
{
"chainer": {
"in": ["x"],
"pipe": [
{
"class_name": "ner_chunker",
"batch_size": 16,
"max_seq_len" : 300,
"vocab_file": "{TRANSFORMER}",
"in": ["x"],
"out": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"]
},
{
"thres_proba": 0.6,
"o_tag": "O",
"tags_file": "{NER_PATH}/tag.dict",
"class_name": "entity_detection_parser",
"id": "edp"
},
{
"class_name": "ner_chunk_model",
"ner": {
"config_path": "{CONFIGS_PATH}/ner/ner_ontonotes_bert.json",
"overwrite": {
"chainer.out": ["x_tokens", "tokens_offsets", "y_pred", "probas"]
}
},
"ner_parser": "#edp",
"in": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"],
"out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
}
],
"out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"MODELS_PATH": "{ROOT_PATH}/models",
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs",
"TRANSFORMER": "bert-base-cased",
"NER_PATH": "{MODELS_PATH}/ner_ontonotes_bert_torch_crf"
}
}
}
================================================
FILE: deeppavlov/configs/entity_extraction/entity_detection_ru.json
================================================
{
"chainer": {
"in": ["x"],
"pipe": [
{
"class_name": "ner_chunker",
"batch_size": 16,
"max_seq_len" : 300,
"vocab_file": "{TRANSFORMER}",
"in": ["x"],
"out": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"]
},
{
"thres_proba": 0.05,
"o_tag": "O",
"tags_file": "{NER_PATH}/tag.dict",
"class_name": "entity_detection_parser",
"id": "edp"
},
{
"class_name": "ner_chunk_model",
"ner": {"config_path": "{CONFIGS_PATH}/ner/ner_rus_bert_probas.json"},
"ner_parser": "#edp",
"in": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"],
"out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
}
],
"out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"MODELS_PATH": "{ROOT_PATH}/models",
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs",
"TRANSFORMER": "DeepPavlov/rubert-base-cased",
"NER_PATH": "{MODELS_PATH}/wiki_ner_rus_bert"
}
}
}
================================================
FILE: deeppavlov/configs/entity_extraction/entity_extraction_en.json
================================================
{
"chainer": {
"in": ["x"],
"pipe": [
{
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_detection_en.json",
"in": ["x"],
"out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
},
{
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_linking_en.json",
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
}
],
"out": ["entity_substr", "tags", "entity_offsets", "entity_ids", "entity_conf", "entity_pages", "entity_labels"]
},
"metadata": {
"variables": {
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
}
}
}
================================================
FILE: deeppavlov/configs/entity_extraction/entity_extraction_ru.json
================================================
{
"chainer": {
"in": ["x"],
"pipe": [
{
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_detection_ru.json",
"in": ["x"],
"out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
},
{
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_linking_ru.json",
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
}
],
"out": ["entity_substr", "tags", "entity_offsets", "entity_ids", "entity_conf", "entity_pages", "entity_labels"]
},
"metadata": {
"variables": {
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
}
}
}
================================================
FILE: deeppavlov/configs/entity_extraction/entity_linking_en.json
================================================
{
"chainer": {
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"pipe": [
{
"class_name": "torch_transformers_entity_ranker_infer",
"id": "entity_descr_ranking",
"pretrained_bert": "{TRANSFORMER}",
"encoder_weights_path": "{MODELS_PATH}/entity_linking_eng/encoder.pth.tar",
"bilinear_weights_path": "{MODELS_PATH}/entity_linking_eng/bilinear.pth.tar",
"special_token_id": 30522,
"emb_size": 512,
"block_size": 8
},
{
"class_name": "entity_linker",
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"],
"load_path": "{DOWNLOADS_PATH}/entity_linking_eng",
"entities_database_filename": "el_eng_v2.db",
"entity_ranker": "#entity_descr_ranking",
"rank_in_runtime": true,
"num_entities_for_bert_ranking": 20,
"include_mention": false,
"num_entities_to_return": 3,
"lemmatize": true,
"use_descriptions": true,
"use_connections": true,
"use_tags": true,
"full_paragraph": true,
"return_confidences": true,
"lang": "en"
}
],
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"TRANSFORMER": "prajjwal1/bert-small"
},
"download": [
{
"url": "http://files.deeppavlov.ai/kbqa/downloads/el_db_eng_v2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/entity_linking_eng"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/entity_linking/el_ranker_eng.tar.gz",
"subdir": "{MODELS_PATH}/entity_linking_eng"
}
]
}
}
================================================
FILE: deeppavlov/configs/entity_extraction/entity_linking_ru.json
================================================
{
"chainer": {
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"pipe": [
{
"class_name": "torch_transformers_entity_ranker_infer",
"id": "entity_descr_ranking",
"pretrained_bert": "{TRANSFORMER}",
"encoder_weights_path": "{MODELS_PATH}/entity_linking_rus/encoder.pth.tar",
"bilinear_weights_path": "{MODELS_PATH}/entity_linking_rus/bilinear.pth.tar",
"special_token_id": 30522,
"emb_size": 264,
"block_size": 6
},
{
"class_name": "entity_linker",
"in": ["entity_substr", "tags", "probas", "sentences", "entity_offsets", "sentences_offsets"],
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"],
"load_path": "{DOWNLOADS_PATH}/entity_linking_rus",
"entities_database_filename": "el_rus_v2.db",
"words_dict_filename": "{DOWNLOADS_PATH}/entity_linking_rus/words_dict.pickle",
"ngrams_matrix_filename": "{DOWNLOADS_PATH}/entity_linking_rus/ngrams_matrix.npz",
"entity_ranker": "#entity_descr_ranking",
"rank_in_runtime": true,
"num_entities_for_bert_ranking": 30,
"use_gpu": false,
"include_mention": false,
"num_entities_to_return": 3,
"lemmatize": true,
"use_descriptions": true,
"use_connections": true,
"use_tags": true,
"kb_filename": "{DOWNLOADS_PATH}/wikidata/wikidata_lite.hdt",
"prefixes": {"entity": ["http://we"],
"rels": {"direct": "http://wpd",
"no_type": "http://wp",
"statement": "http://wps",
"qualifier": "http://wpq"
}
},
"full_paragraph": true,
"return_confidences": true,
"lang": "ru"
}
],
"out": ["entity_ids", "entity_conf", "entity_pages", "entity_labels"]
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"TRANSFORMER": "DeepPavlov/distilrubert-tiny-cased-conversational-v1"
},
"download": [
{
"url": "http://files.deeppavlov.ai/kbqa/downloads/el_files_rus_v2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/entity_linking_rus"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/entity_linking/el_ranker_rus.tar.gz",
"subdir": "{MODELS_PATH}/entity_linking_rus"
},
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/wikidata_lite.tar.gz",
"subdir": "{DOWNLOADS_PATH}/wikidata"
}
]
}
}
================================================
FILE: deeppavlov/configs/faq/fasttext_logreg.json
================================================
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"format": "json",
"orient": "split",
"x": "text",
"y": "category",
"data_path": "{DOWNLOADS_PATH}/massive/{LANGUAGE}",
"train": "train.json",
"valid": "dev.json",
"test": "test.json"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42,
"shuffle": true,
"shot": 5
},
"chainer": {
"in": ["text"],
"in_y": ["category"],
"pipe": [
{
"class_name": "stream_spacy_tokenizer",
"in": ["text"],
"id": "my_tokenizer",
"lemmas": false,
"out": "token_lemmas",
"spacy_model": "{SPACY_MODEL}"
},
{
"ref": "my_tokenizer",
"in": ["token_lemmas"],
"out": ["text_lem"]
},
{
"class_name": "fasttext",
"in": ["token_lemmas"],
"load_path": "{DOWNLOADS_PATH}/embeddings/fasttext/{LANGUAGE}.bin",
"mean": true,
"out": ["text_vector"]
},
{
"id": "answers_vocab",
"class_name": "simple_vocab",
"fit_on": "category",
"save_path": "{MODEL_PATH}/cat_answers.dict",
"load_path": "{MODEL_PATH}/cat_answers.dict",
"in": ["category"],
"out": ["y_ids"]
},
{
"in": ["text_vector"],
"fit_on": ["text_vector", "y_ids"],
"out": ["y_pred_proba"],
"class_name": "sklearn_component",
"main": true,
"save_path": "{MODEL_PATH}/model.pkl",
"load_path": "{MODEL_PATH}/model.pkl",
"model_class": "sklearn.linear_model:LogisticRegression",
"infer_method": "predict_proba",
"C": 10,
"penalty": "l2"
},
{
"in": ["y_pred_proba"],
"out": ["y_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_pred_ids"],
"out": ["y_pred_category"],
"ref": "answers_vocab"
}
],
"out": ["y_pred_category"]
},
"train": {
"evaluation_targets": ["train", "valid", "test"],
"class_name": "fit_trainer",
"metrics": [
{
"name": "accuracy",
"inputs": ["category", "y_pred_category"]
}
]
},
"metadata": {
"variables": {
"LANGUAGE": "en",
"ROOT_PATH": "~/.deeppavlov",
"SPACY_MODEL": "en_core_web_sm",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODEL_PATH": "{ROOT_PATH}/models/faq/{LANGUAGE}/fasttext_logreg"
},
"download": [
{
"url": "http://files.deeppavlov.ai/embeddings/fasttext/{LANGUAGE}.bin",
"subdir": "{DOWNLOADS_PATH}/embeddings/fasttext"
},
{
"url": "http://files.deeppavlov.ai/datasets/massive-{LANGUAGE}.tar.gz",
"subdir": "{DOWNLOADS_PATH}/massive/{LANGUAGE}"
},
{
"url": "http://files.deeppavlov.ai/faq/fasttext_logreg_{LANGUAGE}.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/kbqa/kbqa_cq_en.json
================================================
{
"dataset_reader": {
"class_name": "lcquad_reader",
"question_types": ["statement_property", "right-subgraph", "simple question left",
"simple question right", "left-subgraph", "rank"],
"num_samples": 100,
"data_path": "{DOWNLOADS_PATH}/lcquad/lcquad2.json"
},
"dataset_iterator": {
"class_name": "data_learning_iterator"
},
"chainer": {
"in": ["x"],
"in_y": ["gold_answer_ids", "gold_answer_labels", "gold_query"],
"pipe": [
{
"class_name": "question_sign_checker",
"in": ["x"],
"out": ["x_punct"]
},
{
"config_path": "{CONFIGS_PATH}/classifiers/query_pr.json",
"in": ["x_punct"],
"out": ["template_type"]
},
{
"class_name": "query_formatter",
"query_info": {"unk_var": "?answer", "mid_var": "?ent"},
"in": ["gold_query"],
"out": ["f_gold_query"]
},
{
"config_path": "{CONFIGS_PATH}/entity_extraction/entity_detection_en.json",
"overwrite": {
"chainer.pipe.1.make_tags_from_probas": true,
"chainer.pipe.2.ner": {
"config_path": "{CONFIGS_PATH}/ner/ner_ontonotes_bert.json",
"overwrite": {
"chainer.out": ["x_tokens", "tokens_offsets", "y_pred", "probas"],
"chainer.pipe.2.use_crf": false,
"metadata.variables.TRANSFORMER": "distilbert-base-cased",
"metadata.variables.MODEL_PATH": "{MODELS_PATH}/entity_type_detection_distilbert_lcquad2.0"
}
},
"metadata.variables.NER_PATH": "{MODELS_PATH}/entity_type_detection_distilbert_lcquad2.0"
},
"in": ["x_punct", "template_type"],
"out": ["entity_type_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
},
{
"class_name": "entity_type_split",
"in": ["entity_type_substr", "tags"],
"out": ["entity_substr", "entity_tags", "type_substr"]
},
{
"class_name": "answer_types_extractor",
"lang": "@en",
"types_filename": "{DOWNLOADS_PATH}/wikidata_eng/types_labels_dict_en.pickle",
"types_sets_filename": "{DOWNLOADS_PATH}/wikidata_eng/answer_types.pickle",
"in": ["x_punct", "entity_substr", "tags"],
"out": ["answer_types", "f_entity_substr", "f_tags"]
},
{
"class_name": "entity_linker",
"load_path": "{DOWNLOADS_PATH}/entity_linking_eng",
"entities_database_filename": "el_db_lcquad2.db",
"num_entities_to_return": 7,
"lemmatize": true,
"use_descriptions": false,
"use_connections": false,
"use_tags": true,
"alias_coef": 1.0,
"prefixes": {"entity": ["http://we"],
"rels": {"direct": "http://wpd",
"no_type": "http://wp",
"statement": "http://wps",
"qualifier": "http://wpq"
}
},
"return_confidences": true,
"lang": "en",
"id": "entity_linker"
},
{
"class_name": "wiki_parser",
"id": "wiki_p",
"wiki_filename": "{DOWNLOADS_PATH}/wikidata/wikidata_full.hdt",
"lang": "@en"
},
{
"class_name": "template_matcher",
"id": "template_m",
"num_processors": 16,
"load_path": "{DOWNLOADS_PATH}/wikidata_eng",
"templates_filename": "templates_eng.json"
},
{
"class_name": "rel_ranking_infer",
"id": "rel_r_inf",
"ranker": {"config_path": "{CONFIGS_PATH}/ranking/rel_ranking_roberta_en.json",
"overwrite": {"chainer.out": ["y_pred_probas"]}
},
"wiki_parser": "#wiki_p",
"batch_size": 32,
"rank_answers": true,
"load_path": "{DOWNLOADS_PATH}/wikidata_eng",
"rel_q2name_filename": "wiki_dict_properties_eng.pickle"
},
{
"class_name": "query_generator",
"id": "query_g",
"entity_linker": "#entity_linker",
"template_matcher": "#template_m",
"rel_ranker": "#rel_r_inf",
"wiki_parser": "#wiki_p",
"load_path": "{DOWNLOADS_PATH}/wikidata",
"rels_in_ranking_queries_fname": "rels_in_ranking_queries.json",
"sparql_queries_filename": "{DOWNLOADS_PATH}/wikidata/sparql_queries_eng.json",
"entities_to_leave": 5,
"rels_to_leave": 10,
"return_answers": false,
"map_query_str_to_kb": [["P0", "http://wd"], ["P00", "http://wl"], ["wd:", "http://we/"], ["wdt:", "http://wpd/"],
[" p:", " http://wp/"], ["ps:", "http://wps/"], ["pq:", "http://wpq/"]],
"kb_prefixes": {"entity": "wd:E", "rel": "wdt:R", "type": "wd:T", "type_rel": "wdt:P", "type_rels": ["P31", "P279"]},
"gold_query_info": {"unk_var": "?answer", "mid_var": "?ent"},
"in": ["x_punct", "x_punct", "template_type", "entity_substr", "type_substr", "entity_tags", "probas", "answer_types"],
"out": ["cand_answers", "template_answers"]
},
{
"class_name": "rel_ranking_infer",
"ranker": {"config_path": "{CONFIGS_PATH}/ranking/path_ranking_nll_roberta_en.json"},
"wiki_parser": "#wiki_p",
"batch_size": 32,
"nll_path_ranking": true,
"return_elements": ["answer_ids", "queries"],
"rank_answers": true,
"load_path": "{DOWNLOADS_PATH}/wikidata_eng",
"rel_q2name_filename": "wiki_dict_properties_eng.pickle",
"in": ["x_punct", "template_type", "cand_answers", "entity_substr", "template_answers"],
"out": ["answers", "answer_ids", "query"]
}
],
"out": ["answers", "answer_ids", "query"]
},
"train": {
"evaluation_targets": ["test"],
"batch_size": 1,
"metrics": [
{
"name": "kbqa_accuracy",
"inputs": ["x", "answers", "answer_ids", "query", "gold_answer_labels", "gold_answer_ids", "f_gold_query"]
}
],
"class_name": "nn_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
},
"download": [
{
"url": "http://files.deeppavlov.ai/kbqa/datasets/lcquad2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/lcquad"
},
{
"url": "http://files.deeppavlov.ai/kbqa/models/entity_type_detection_distilbert_lcquad2.0.tar.gz",
"subdir": "{MODELS_PATH}/entity_type_detection_distilbert_lcquad2.0"
},
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/queries_and_rels_lcquad2_v2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/wikidata"
},
{
"url": "http://files.deeppavlov.ai/kbqa/downloads/el_db_lcquad2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/entity_linking_eng"
},
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/wikidata_full.tar.gz",
"subdir": "{DOWNLOADS_PATH}/wikidata"
},
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/kbqa_files_en.tar.gz",
"subdir": "{DOWNLOADS_PATH}/wikidata_eng"
}
]
}
}
================================================
FILE: deeppavlov/configs/kbqa/kbqa_cq_ru.json
================================================
{
"dataset_reader": {
"class_name": "rubq_reader",
"version": "2.0",
"question_types": ["all"],
"num_samples": 100,
"data_path": "{DOWNLOADS_PATH}/rubq/rubq2.0.json"
},
"dataset_iterator": {
"class_name": "data_learning_iterator"
},
"chainer": {
"in": ["x"],
"in_y": ["gold_answer_ids", "gold_answer_labels", "gold_query"],
"pipe": [
{
"class_name": "question_sign_checker",
"delete_brackets": true,
"in": ["x"],
"out": ["x_punct"]
},
{
"class_name": "query_formatter",
"query_info": {"unk_var": "?answer", "mid_var": "?ent"},
"in": ["gold_query"],
"out": ["f_gold_query"]
},
{
"class_name": "ner_chunker",
"batch_size": 16,
"max_seq_len" : 300,
"vocab_file": "distilbert-base-multilingual-cased",
"in": ["x_punct"],
"out": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"]
},
{
"thres_proba": 0.05,
"o_tag": "O",
"tags_file": "{NER_PATH}/tag.dict",
"class_name": "entity_detection_parser",
"ignored_tags": ["DATE", "CARDINAL", "ORDINAL", "QUANTITY", "PERCENT", "NORP"],
"lang": "ru",
"id": "edp"
},
{
"thres_proba": 0.05,
"o_tag": "O",
"tags_file": "{NER_PATH2}/tag.dict",
"class_name": "entity_detection_parser",
"ignored_tags": ["T"],
"lang": "ru",
"id": "edp2"
},
{
"class_name": "ner_chunk_model",
"ner": {
"config_path": "{CONFIGS_PATH}/ner/ner_ontonotes_bert_mult.json",
"overwrite": {
"chainer.pipe.2.use_crf": false,
"metadata.variables.TRANSFORMER": "distilbert-base-multilingual-cased",
"chainer.out": ["x_tokens", "tokens_offsets", "y_pred", "probas"],
"metadata.variables.MODEL_PATH": "{MODELS_PATH}/ner_ontonotes_torch_distilbert_mult"
}
},
"ner_parser": "#edp",
"ner2": {
"config_path": "{CONFIGS_PATH}/ner/ner_ontonotes_bert_mult.json",
"overwrite": {
"chainer.pipe.2.use_crf": false,
"metadata.variables.TRANSFORMER": "DeepPavlov/distilrubert-small-cased-conversational",
"chainer.out": ["x_tokens", "tokens_offsets", "y_pred", "probas"],
"metadata.variables.MODEL_PATH": "{MODELS_PATH}/entity_detection_rubq"
}
},
"ner_parser2": "#edp2",
"in": ["x_chunk", "chunk_nums", "chunk_sentences_offsets", "chunk_sentences"],
"out": ["entity_substr", "entity_offsets", "entity_positions", "tags", "sentences_offsets", "sentences", "probas"]
},
{
"class_name": "answer_types_extractor",
"lang": "@ru",
"types_filename": "{DOWNLOADS_PATH}/wikidata_rus/types_labels_dict_ru.pickle",
"types_sets_filename": "{DOWNLOADS_PATH}/wikidata_rus/answer_types.pickle",
"in": ["x_punct", "entity_substr", "tags"],
"out": ["answer_types", "f_entity_substr", "f_tags"]
},
{
"class_name": "entity_linker",
"load_path": "{DOWNLOADS_PATH}/entity_linking_rus",
"entities_database_filename": "el_db_rus.db",
"words_dict_filename": "{DOWNLOADS_PATH}/entity_linking_rus/words_dict.pickle",
"ngrams_matrix_filename": "{DOWNLOADS_PATH}/entity_linking_rus/ngrams_matrix.npz",
"include_mention": false,
"num_entities_to_return": 7,
"lemmatize": true,
"use_descriptions": false,
"use_connections": true,
"use_tags": true,
"kb_filename": "{DOWNLOADS_PATH}/wikidata/wikidata_full.hdt",
"prefixes": {"entity": ["http://we"],
"rels": {"direct": "http://wpd",
"no_type": "http://wp",
"statement": "http://wps",
"qualifier": "http://wpq"
}
},
"return_confidences": true,
"lang": "ru",
"id": "entity_linker"
},
{
"class_name": "wiki_parser",
"id": "wiki_p",
"wiki_filename": "{DOWNLOADS_PATH}/wikidata/wikidata_full.hdt",
"max_comb_num": 40000,
"lang": "@ru"
},
{
"class_name": "slovnet_syntax_parser",
"load_path": "{MODELS_PATH}/slovnet_syntax_parser",
"navec_filename": "{MODELS_PATH}/slovnet_syntax_parser/navec_news_v1_1B_250K_300d_100q.tar",
"syntax_parser_filename": "{MODELS_PATH}/slovnet_syntax_parser/slovnet_syntax_news_v1.tar",
"tree_patterns_filename": "{MODELS_PATH}/slovnet_syntax_parser/tree_patterns.json",
"id": "slovnet_parser"
},
{
"class_name": "ru_adj_to_noun",
"freq_dict_filename": "{DOWNLOADS_PATH}/wikidata_rus/freqrnc2011.csv",
"id": "adj2noun"
},
{
"class_name": "tree_to_sparql",
"sparql_queries_filename": "{DOWNLOADS_PATH}/wikidata/sparql_queries_rus.json",
"adj_to_noun": "#adj2noun",
"syntax_parser": "#slovnet_parser",
"kb_prefixes": {"entity": "wd:E", "rel": "wdt:R", "type": "wd:T", "type_rel": "wdt:P", "type_rels": ["P31", "P279"]},
"in": ["x_punct", "entity_substr", "tags", "entity_offsets", "entity_positions", "probas"],
"out": ["x_sanitized", "query_nums", "s_entity_substr", "s_tags", "s_probas", "entities_to_link", "s_types_substr"]
},
{
"class_name": "template_matcher",
"id": "template_m",
"num_processors": 8,
"load_path": "{DOWNLOADS_PATH}/wikidata_rus",
"templates_filename": "templates_rus.json"
},
{
"class_name": "rel_ranking_infer",
"id": "rel_r_inf",
"ranker": {"config_path": "{CONFIGS_PATH}/ranking/rel_ranking_nll_bert_ru.json"},
"wiki_parser": "#wiki_p",
"batch_size": 32,
"nll_rel_ranking": true,
"return_elements": ["answer_ids", "queries"],
"load_path": "{DOWNLOADS_PATH}/wikidata_rus",
"rank": false,
"rel_thres": -4.0,
"type_rels": ["P31", "P279"],
"rel_q2name_filename": "wiki_dict_properties_full_rus.pickle"
},
{
"class_name": "query_generator",
"id": "query_g",
"entity_linker": "#entity_linker",
"template_matcher": "#template_m",
"rel_ranker": "#rel_r_inf",
"wiki_parser": "#wiki_p",
"load_path": "{DOWNLOADS_PATH}/wikidata",
"rels_in_ranking_queries_fname": "rels_in_ranking_queries.json",
"sparql_queries_filename": "{DOWNLOADS_PATH}/wikidata/sparql_queries_rus.json",
"entities_to_leave": 9,
"rels_to_leave": 10,
"max_comb_num": 1000,
"map_query_str_to_kb": [["P0", "http://wd"], ["P00", "http://wl"], ["wd:", "http://we/"], ["wdt:", "http://wpd/"],
[" p:", " http://wp/"], ["ps:", "http://wps/"], ["pq:", "http://wpq/"]],
"kb_prefixes": {"entity": "wd:E", "rel": "wdt:R", "type": "wd:T", "type_rel": "wdt:P", "type_rels": ["P31", "P279"]},
"gold_query_info": {"unk_var": "?answer", "mid_var": "?ent"},
"syntax_structure_known": true,
"in": ["x_punct", "x_sanitized", "query_nums", "s_entity_substr", "s_types_substr", "s_tags", "s_probas", "answer_types", "entities_to_link"],
"out": ["answers", "answer_ids", "query"]
}
],
"out": ["answers", "answer_ids", "query"]
},
"train": {
"evaluation_targets": ["test"],
"batch_size": 1,
"metrics": [
{
"name": "kbqa_accuracy",
"inputs": ["x", "answers", "answer_ids", "query", "gold_answer_labels", "gold_answer_ids", "f_gold_query"]
}
],
"class_name": "nn_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs",
"NER_PATH": "{MODELS_PATH}/ner_ontonotes_torch_distilbert_mult",
"NER_PATH2": "{MODELS_PATH}/entity_detection_rubq"
},
"download": [
{
"url": "http://files.deeppavlov.ai/datasets/rubq2.0.tar.gz",
"subdir": "{DOWNLOADS_PATH}/rubq"
},
{
"url": "http://files.deeppavlov.ai/kbqa/downloads/el_files_rus.tar.gz",
"subdir": "{DOWNLOADS_PATH}/entity_linking_rus"
},
{
"url": "http://files.deeppavlov.ai/kbqa/models/ner_ontonotes_torch_distilbert_mult.tar.gz",
"subdir": "{MODELS_PATH}/ner_ontonotes_torch_distilbert_mult"
},
{
"url": "http://files.deeppavlov.ai/kbqa/models/entity_detection_rubq.tar.gz",
"subdir": "{MODELS_PATH}/entity_detection_rubq"
},
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/queries_and_rels_rus_v2.tar.gz",
"subdir": "{DOWNLOADS_PATH}/wikidata"
},
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/kbqa_files_ru.tar.gz",
"subdir": "{DOWNLOADS_PATH}/wikidata_rus"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/syntax_parser/slovnet_syntax_parser_v2.tar.gz",
"subdir": "{MODELS_PATH}/slovnet_syntax_parser"
},
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/wikidata_full.tar.gz",
"subdir": "{DOWNLOADS_PATH}/wikidata"
}
]
}
}
================================================
FILE: deeppavlov/configs/kbqa/wiki_parser.json
================================================
{
"chainer": {
"in": ["parser_info", "query"],
"pipe": [
{
"class_name": "wiki_parser",
"in": ["parser_info", "query"],
"out": ["wiki_parser_output"],
"wiki_filename": "{DOWNLOADS_PATH}/wikidata/wikidata_compr.pickle",
"file_format": "pickle",
"lang": "@en"
}
],
"out": ["wiki_parser_output"]
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
},
"download": [
{
"url": "http://files.deeppavlov.ai/kbqa/wikidata/wikidata_compr.pickle",
"subdir": "{DOWNLOADS_PATH}/wikidata"
}
]
}
}
================================================
FILE: deeppavlov/configs/morpho_syntax_parser/morpho_ru_syntagrus_bert.json
================================================
{
"dataset_reader": {
"class_name": "morphotagger_dataset_reader",
"data_path": "{DOWNLOADS_PATH}/UD2.3_source",
"language": "ru_syntagrus",
"data_types": ["train", "dev", "test"]
},
"dataset_iterator": {
"class_name": "morphotagger_dataset_iterator"
},
"chainer": {
"in": ["x"],
"in_y": ["y"],
"pipe": [
{
"in": ["x"],
"class_name": "lazy_tokenizer",
"out": ["x_words"]
},
{
"class_name": "torch_transformers_ner_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 512,
"max_subword_length": 15,
"token_masking_prob": 0.0,
"in": ["x_words"],
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask", "tokens_offsets"]
},
{
"id": "tag_vocab",
"class_name": "simple_vocab",
"min_freq": 3,
"fit_on": ["y"],
"in": ["y"],
"out": ["y_ind"],
"special_tokens": ["PAD", "BEGIN", "END"],
"pad_with_zeros": true,
"save_path": "{MODEL_PATH}/tag.dict",
"load_path": "{MODEL_PATH}/tag.dict"
},
{
"class_name": "torch_transformers_sequence_tagger",
"n_tags": "#tag_vocab.len",
"pretrained_bert": "{TRANSFORMER}",
"attention_probs_keep_prob": 0.5,
"use_crf": false,
"encoder_layer_ids": [-6, -5, -4, -3, -2, -1],
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05,
"weight_decay": 1e-06,
"betas": [0.9, 0.999],
"eps": 1e-06
},
"clip_norm": 1.0,
"min_learning_rate": 1e-07,
"learning_rate_drop_patience": 10,
"learning_rate_drop_div": 1.5,
"load_before_drop": true,
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"],
"in_y": ["y_ind"],
"out": ["y_pred_ind", "probas"]
},
{
"ref": "tag_vocab",
"in": ["y_pred_ind"],
"out": ["y_pred"]
},
{
"in": ["x_words"],
"out": ["y_lemmas"],
"model": "ru_core_news_sm",
"class_name": "spacy_lemmatizer"
},
{
"in": ["x_words", "y_pred", "y_lemmas"],
"out": ["y_prettified"],
"id": "prettifier",
"class_name": "lemmatized_output_prettifier"
}
],
"out": ["y_prettified"]
},
"train": {
"epochs": 10,
"batch_size": 32,
"metrics": [
{
"name": "per_token_accuracy",
"inputs": ["y", "y_pred"]
},
{
"name": "accuracy",
"inputs": ["y", "y_pred"]
}
],
"validation_patience": 15,
"val_every_n_epochs": 1,
"val_every_n_batches": 300,
"show_examples": false,
"pytest_max_batches": 2,
"pytest_batch_size": 8,
"evaluation_targets": ["valid", "test"],
"class_name": "nn_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"TRANSFORMER": "DeepPavlov/rubert-base-cased",
"MODEL_PATH": "{MODELS_PATH}/morpho_ru_syntagrus_torch_bert"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.3/ru_syntagrus.tar.gz",
"subdir": "{DOWNLOADS_PATH}/UD2.3_source/ru_syntagrus"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/syntax_parsing/morpho_ru_syntagrus_torch_bert.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/morpho_syntax_parser/ru_syntagrus_joint_parsing.json
================================================
{
"chainer": {
"in": ["x_words"],
"pipe": [
{
"id": "main",
"class_name": "joint_tagger_parser",
"tagger": {
"config_path": "{CONFIGS_PATH}/morpho_syntax_parser/morpho_ru_syntagrus_bert.json",
"overwrite": {"chainer.pipe.6.return_string": false}
},
"parser": {
"config_path": "{CONFIGS_PATH}/morpho_syntax_parser/syntax_ru_syntagrus_bert.json",
"overwrite": {"chainer.pipe.6.return_string": false}
},
"in": ["x_words"],
"out": ["y_parsed"]
}
],
"out": ["y_parsed"]
},
"metadata": {
"variables": {
"CONFIGS_PATH": "{DEEPPAVLOV_PATH}/configs"
}
}
}
================================================
FILE: deeppavlov/configs/morpho_syntax_parser/syntax_ru_syntagrus_bert.json
================================================
{
"dataset_reader": {
"class_name": "morphotagger_dataset_reader",
"data_path": "{DOWNLOADS_PATH}/UD2.3_source",
"language": "ru_syntagrus",
"data_types": ["train", "dev", "test"],
"read_syntax": true
},
"dataset_iterator": {
"class_name": "morphotagger_dataset_iterator"
},
"chainer": {
"in": ["x"],
"in_y": ["y_tags", "y_heads", "y_deps"],
"pipe": [
{
"in": ["x"],
"class_name": "lazy_tokenizer",
"out": ["x_words"]
},
{
"class_name": "torch_transformers_ner_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 512,
"max_subword_length": 15,
"token_masking_prob": 0.0,
"in": ["x_words"],
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask", "tokens_offsets"]
},
{
"id": "dep_vocab",
"class_name": "simple_vocab",
"min_freq": 3,
"fit_on": ["y_deps"],
"in": ["y_deps"],
"out": ["y_deps_indexes"],
"special_tokens": ["PAD"],
"pad_with_zeros": true,
"save_path": "{MODEL_PATH}/deps.dict",
"load_path": "{MODEL_PATH}/deps.dict"
},
{
"class_name": "torch_transformers_syntax_parser",
"n_deps": "#dep_vocab.len",
"state_size": 384,
"pretrained_bert": "{TRANSFORMER}",
"attention_probs_keep_prob": 0.5,
"return_probas": true,
"encoder_layer_ids": [6, 7, 8, 9, 10, 11],
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05,
"weight_decay": 1e-06,
"betas": [0.9, 0.999],
"eps": 1e-06
},
"clip_norm": 1.0,
"min_learning_rate": 1e-07,
"use_birnn": true,
"learning_rate_drop_patience": 10,
"learning_rate_drop_div": 1.5,
"load_before_drop": true,
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"],
"in_y": ["y_heads", "y_deps_indexes"],
"out": ["y_predicted_heads_probs", "y_predicted_deps_indexes"]
},
{
"class_name": "chu_liu_edmonds_transformer",
"in": ["y_predicted_heads_probs"],
"out": ["y_predicted_heads"]
},
{
"ref": "dep_vocab",
"in": ["y_predicted_deps_indexes"],
"out": ["y_predicted_deps"]
},
{
"in": ["x_words", "y_predicted_heads", "y_predicted_deps"],
"out": ["y_prettified"],
"id": "dependency_output_prettifier",
"class_name": "dependency_output_prettifier"
}
],
"out": ["y_prettified"]
},
"train": {
"epochs": 10,
"batch_size": 32,
"metrics": [
{
"name": "multitask_token_accuracy",
"alias": "LAS",
"inputs": ["y_deps", "y_heads", "y_predicted_deps", "y_predicted_heads"]
},
{
"name": "per_token_accuracy",
"alias": "UAS",
"inputs": ["y_heads", "y_predicted_heads"]
}
],
"validation_patience": 15,
"val_every_n_batches": 300,
"show_examples": false,
"pytest_max_batches": 2,
"pytest_batch_size": 8,
"evaluation_targets": ["valid", "test"],
"class_name": "nn_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"TRANSFORMER": "DeepPavlov/rubert-base-cased",
"MODEL_PATH": "{MODELS_PATH}/syntax_parsing/rus_6layers"
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/morpho_tagger/UD2.3/ru_syntagrus.tar.gz",
"subdir": "{DOWNLOADS_PATH}/UD2.3_source/ru_syntagrus"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/syntax_parsing/rus_6layers.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/multitask/mt_glue.json
================================================
{
"dataset_reader": {
"class_name": "multitask_reader",
"task_defaults": {
"class_name": "huggingface_dataset_reader",
"path": "glue",
"train": "train",
"valid": "validation"
},
"tasks": {
"cola": {"name": "cola"},
"sst2": {"name": "sst2"},
"qqp": {"name": "qqp"},
"mrpc": {"name": "mrpc"},
"rte": {"name": "rte"},
"mnli": {
"name": "mnli",
"valid": "validation_matched"
},
"qnli": {"name": "qnli"},
"stsb": {"name": "stsb"}
}
},
"dataset_iterator": {
"class_name": "multitask_iterator",
"num_train_epochs": "{NUM_TRAIN_EPOCHS}",
"gradient_accumulation_steps": "{GRADIENT_ACC_STEPS}",
"seed": 42,
"task_defaults": {
"class_name": "huggingface_dataset_iterator",
"label": "label",
"use_label_name": false,
"seed": 42
},
"tasks": {
"cola": {
"features": ["sentence"]
},
"sst2": {
"features": ["sentence"]
},
"qqp": {
"features": ["question1", "question2"]
},
"mrpc": {
"features": ["sentence1", "sentence2"]
},
"rte": {
"features": ["sentence1", "sentence2"]
},
"mnli": {
"features": ["premise", "hypothesis"]
},
"qnli": {
"features": ["question", "sentence"]
},
"stsb": {
"features": ["sentence1", "sentence2"]
}
}
},
"chainer": {
"in": ["x_cola", "x_sst2", "x_qqp", "x_mrpc", "x_rte", "x_mnli", "x_qnli", "x_stsb"],
"in_y": ["y_cola", "y_sst2", "y_qqp", "y_mrpc", "y_rte", "y_mnli", "y_qnli", "y_stsb"
],
"pipe": [
{
"class_name": "multitask_pipeline_preprocessor",
"possible_keys_to_extract": [0, 1],
"preprocessor": "TorchTransformersPreprocessor",
"vocab_file": "{BACKBONE}",
"max_seq_length": 128,
"do_lower_case": true,
"n_task": 8,
"in": ["x_cola", "x_sst2", "x_qqp", "x_mrpc", "x_rte", "x_mnli", "x_qnli", "x_stsb"],
"out": [
"bert_features_cola",
"bert_features_sst2",
"bert_features_qqp",
"bert_features_mrpc",
"bert_features_rte",
"bert_features_mnli",
"bert_features_qnli",
"bert_features_stsb"
]
},
{
"id": "multitask_transformer",
"class_name": "multitask_transformer",
"optimizer_parameters": {"lr": 2e-5},
"gradient_accumulation_steps": "{GRADIENT_ACC_STEPS}",
"learning_rate_drop_patience": 2,
"learning_rate_drop_div": 2.0,
"return_probas": true,
"backbone_model": "{BACKBONE}",
"save_path": "{MODEL_PATH}",
"load_path": "{MODEL_PATH}",
"tasks": {
"cola": {
"type": "classification",
"options": 2
},
"sst2": {
"type": "classification",
"options": 2
},
"qqp": {
"type": "classification",
"options": 2
},
"mrpc": {
"type": "classification",
"options": 2
},
"rte": {
"type": "classification",
"options": 2
},
"mnli": {
"type": "classification",
"options": 3
},
"qnli": {
"type": "classification",
"options": 2
},
"stsb": {
"type": "regression",
"options": 1
}
},
"in": [
"bert_features_cola",
"bert_features_sst2",
"bert_features_qqp",
"bert_features_mrpc",
"bert_features_rte",
"bert_features_mnli",
"bert_features_qnli",
"bert_features_stsb"
],
"in_y": ["y_cola", "y_sst2", "y_qqp", "y_mrpc", "y_rte", "y_mnli", "y_qnli", "y_stsb"],
"out": [
"y_cola_pred_probas",
"y_sst2_pred_probas",
"y_qqp_pred_probas",
"y_mrpc_pred_probas",
"y_rte_pred_probas",
"y_mnli_pred_probas",
"y_qnli_pred_probas",
"y_stsb_pred"
]
},
{
"in": [
"y_cola_pred_probas",
"y_sst2_pred_probas",
"y_qqp_pred_probas",
"y_mrpc_pred_probas",
"y_rte_pred_probas",
"y_mnli_pred_probas",
"y_qnli_pred_probas"
],
"out": [
"y_cola_pred_ids",
"y_sst2_pred_ids",
"y_qqp_pred_ids",
"y_mrpc_pred_ids",
"y_rte_pred_ids",
"y_mnli_pred_ids",
"y_qnli_pred_ids"
],
"class_name": "proba2labels",
"max_proba": true
}
],
"out": [
"y_cola_pred_probas",
"y_sst2_pred_probas",
"y_qqp_pred_probas",
"y_mrpc_pred_probas",
"y_rte_pred_probas",
"y_mnli_pred_probas",
"y_qnli_pred_probas",
"y_stsb_pred",
"y_cola_pred_ids",
"y_sst2_pred_ids",
"y_qqp_pred_ids",
"y_mrpc_pred_ids",
"y_rte_pred_ids",
"y_mnli_pred_ids",
"y_qnli_pred_ids",
"y_stsb_pred"
]
},
"train": {
"epochs": "{NUM_TRAIN_EPOCHS}",
"batch_size": 32,
"metrics": [
{
"name": "multitask_accuracy",
"inputs": [
"y_rte",
"y_mnli",
"y_qnli",
"y_mrpc",
"y_cola",
"y_sst2",
"y_qqp",
"y_rte_pred_ids",
"y_mnli_pred_ids",
"y_qnli_pred_ids",
"y_mrpc_pred_ids",
"y_cola_pred_ids",
"y_sst2_pred_ids",
"y_qqp_pred_ids"
]
},
{
"name": "accuracy",
"alias": "accuracy_mrpc",
"inputs": ["y_mrpc", "y_mrpc_pred_ids"]
},
{
"name": "accuracy",
"alias": "accuracy_rte",
"inputs": ["y_rte", "y_rte_pred_ids"]
},
{
"name": "accuracy",
"alias": "accuracy_mnli",
"inputs": ["y_mnli", "y_mnli_pred_ids"]
},
{
"name": "accuracy",
"alias": "accuracy_qnli",
"inputs": ["y_qnli", "y_qnli_pred_ids"]
},
{
"name": "accuracy",
"alias": "accuracy_sst",
"inputs": ["y_sst2", "y_sst2_pred_ids"]
},
{
"name": "accuracy",
"alias": "accuracy_cola",
"inputs": ["y_cola", "y_cola_pred_ids"]
},
{
"name": "accuracy",
"alias": "accuracy_qqp",
"inputs": ["y_qqp", "y_qqp_pred_ids"]
},
{
"name": "pearson_correlation",
"alias": "pearson_correlation_stsb",
"inputs": ["y_stsb", "y_stsb_pred"]
},
{
"name": "spearman_correlation",
"alias": "spearman_correlation_stsb",
"inputs": ["y_stsb", "y_stsb_pred"]
}
],
"validation_patience": 3,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["valid"],
"class_name": "torch_trainer",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"BACKBONE": "bert-base-uncased",
"MODELS_PATH": "~/.deeppavlov/models/glue",
"MODEL_PATH": "{MODELS_PATH}/8task",
"NUM_TRAIN_EPOCHS": 5,
"GRADIENT_ACC_STEPS": 1
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/multitask/glue.tar.gz",
"subdir": "{MODELS_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/multitask/multitask_example.json
================================================
{
"dataset_reader": {
"class_name": "multitask_reader",
"task_defaults": {
"class_name": "huggingface_dataset_reader",
"path": "glue",
"train": "train",
"valid": "validation",
"test": "test"
},
"tasks": {
"cola": {"name": "cola"},
"rte": {"name": "rte"},
"stsb": {"name": "stsb"},
"copa": {
"path": "super_glue",
"name": "copa"
},
"conll": {
"class_name": "conll2003_reader",
"use_task_defaults": false,
"data_path": "{DOWNLOADS_PATH}/conll2003/",
"dataset_name": "conll2003",
"provide_pos": false
}
}
},
"dataset_iterator": {
"class_name": "multitask_iterator",
"num_train_epochs": "{NUM_TRAIN_EPOCHS}",
"gradient_accumulation_steps": "{GRADIENT_ACC_STEPS}",
"seed": 42,
"task_defaults": {
"class_name": "huggingface_dataset_iterator",
"label": "label",
"use_label_name": false,
"seed": 42
},
"tasks": {
"cola": {
"features": ["sentence"]
},
"rte": {
"features": ["sentence1", "sentence2"]
},
"stsb": {
"features": ["sentence1", "sentence2"]
},
"copa": {
"features": ["contexts", "choices"]
},
"conll": {
"class_name": "basic_classification_iterator",
"seed": 42,
"use_task_defaults": false
}
}
},
"chainer": {
"in": ["x_cola", "x_rte", "x_stsb", "x_copa", "x_conll"],
"in_y": ["y_cola", "y_rte", "y_stsb", "y_copa", "y_conll"],
"pipe": [
{
"class_name": "multitask_pipeline_preprocessor",
"possible_keys_to_extract": [0, 1],
"preprocessors": [
"TorchTransformersPreprocessor",
"TorchTransformersPreprocessor",
"TorchTransformersPreprocessor",
"TorchTransformersMultiplechoicePreprocessor",
"TorchTransformersNerPreprocessor"
],
"do_lower_case": true,
"n_task": 5,
"vocab_file": "{BACKBONE}",
"max_seq_length": 200,
"max_subword_length": 15,
"token_masking_prob": 0.0,
"return_features": true,
"in": ["x_cola", "x_rte", "x_stsb", "x_copa", "x_conll"],
"out": [
"bert_features_cola",
"bert_features_rte",
"bert_features_stsb",
"bert_features_copa",
"bert_features_conll"
]
},
{
"id": "vocab_conll",
"class_name": "simple_vocab",
"unk_token": ["O"],
"pad_with_zeros": true,
"save_path": "{MODELS_PATH}/tag.dict",
"load_path": "{MODELS_PATH}/tag.dict",
"fit_on": ["y_conll"],
"in": ["y_conll"],
"out": ["y_ids_conll"]
},
{
"id": "multitask_transformer",
"class_name": "multitask_transformer",
"optimizer_parameters": {"lr": 2e-5},
"gradient_accumulation_steps": "{GRADIENT_ACC_STEPS}",
"learning_rate_drop_patience": 2,
"learning_rate_drop_div": 2.0,
"return_probas": true,
"backbone_model": "{BACKBONE}",
"save_path": "{MODEL_PATH}",
"load_path": "{MODEL_PATH}",
"tasks": {
"cola": {
"type": "classification",
"options": 2
},
"rte": {
"type": "classification",
"options": 2
},
"stsb": {
"type": "regression",
"options": 1
},
"copa": {
"type": "multiple_choice",
"options": 2
},
"conll": {
"type": "sequence_labeling",
"options": "#vocab_conll.len"
}
},
"in": [
"bert_features_cola",
"bert_features_rte",
"bert_features_stsb",
"bert_features_copa",
"bert_features_conll"
],
"in_y": ["y_cola", "y_rte", "y_stsb", "y_copa", "y_ids_conll"],
"out": [
"y_cola_pred_probas",
"y_rte_pred_probas",
"y_stsb_pred",
"y_copa_pred_probas",
"y_conll_pred_ids"
]
},
{
"in": ["y_cola_pred_probas"],
"out": ["y_cola_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_rte_pred_probas"],
"out": ["y_rte_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_copa_pred_probas"],
"out": ["y_copa_pred_ids"],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": ["y_conll_pred_ids"],
"out": ["y_conll_pred_labels"],
"ref": "vocab_conll"
}
],
"out": ["y_cola_pred_ids", "y_rte_pred_ids", "y_stsb_pred", "y_copa_pred_ids", "y_conll_pred_labels"]
},
"train": {
"epochs": "{NUM_TRAIN_EPOCHS}",
"batch_size": 32,
"metrics": [
{
"name": "multitask_accuracy",
"inputs": ["y_rte", "y_cola", "y_copa", "y_rte_pred_ids", "y_cola_pred_ids", "y_copa_pred_ids"]
},
{
"name": "ner_f1",
"inputs": ["y_conll", "y_conll_pred_labels"]
},
{
"name": "ner_token_f1",
"inputs": ["y_conll", "y_conll_pred_labels"]
},
{
"name": "accuracy",
"alias": "accuracy_cola",
"inputs": ["y_cola", "y_cola_pred_ids"]
},
{
"name": "accuracy",
"alias": "accuracy_rte",
"inputs": ["y_rte", "y_rte_pred_ids"]
},
{
"name": "accuracy",
"alias": "accuracy_copa",
"inputs": ["y_copa", "y_copa_pred_ids"]
},
{
"name": "pearson_correlation",
"alias": "pearson_stsb",
"inputs": ["y_stsb", "y_stsb_pred"]
},
{
"name": "spearman_correlation",
"alias": "spearman_stsb",
"inputs": ["y_stsb", "y_stsb_pred"]
}
],
"validation_patience": 3,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": ["valid"],
"class_name": "torch_trainer",
"pytest_max_batches": 2
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"MODELS_PATH": "{ROOT_PATH}/models/multitask_example",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"BACKBONE": "distilbert-base-uncased",
"MODEL_PATH": "{MODELS_PATH}/{BACKBONE}",
"NUM_TRAIN_EPOCHS": 5,
"GRADIENT_ACC_STEPS": 1
},
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/multitask/multitask_example.tar.gz",
"subdir": "{MODELS_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/ner/ner_bert_base.json
================================================
{
"chainer": {
"in": ["x"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_ner_preprocessor",
"vocab_file": "{BASE_MODEL}",
"in": ["x"],
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask", "tokens_offsets"]
},
{
"id": "tag_vocab",
"class_name": "simple_vocab",
"unk_token": ["O"],
"save_path": "{MODEL_PATH}/tag.dict",
"load_path": "{MODEL_PATH}/tag.dict",
"fit_on": ["y"],
"in": ["y"],
"out": ["y_ind"]
},
{
"class_name": "torch_transformers_sequence_tagger",
"n_tags": "#tag_vocab.len",
"pretrained_bert": "{BASE_MODEL}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"],
"in_y": ["y_ind"],
"out": ["y_pred_ind", "probas"]
},
{
"ref": "tag_vocab",
"in": ["y_pred_ind"],
"out": ["y_pred"]
}
],
"out": ["x_tokens", "y_pred"]
},
"metadata": {
"variables": {
"BASE_MODEL": "bert-base-multilingual-cased",
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/ner/{BASE_MODEL}"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/ner/ner_bert_base.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/ner/ner_case_agnostic_mdistilbert.json
================================================
{
"dataset_reader": {
"class_name": "conll2003_reader",
"data_path": "{DOWNLOADS_PATH}/conll2003/",
"dataset_name": "conll2003",
"provide_pos": false
},
"dataset_iterator": {
"class_name": "data_learning_iterator"
},
"chainer": {
"in": ["x"],
"in_y": ["y"],
"pipe": [
{
"class_name": "torch_transformers_ner_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 512,
"max_subword_length": 15,
"token_masking_prob": 0.0,
"in": ["x"],
"out": ["x_tokens", "x_subword_tokens", "x_subword_tok_ids", "startofword_markers", "attention_mask", "tokens_offsets"]
},
{
"id": "tag_vocab",
"class_name": "simple_vocab",
"unk_token": ["O"],
"pad_with_zeros": true,
"save_path": "{MODEL_PATH}/tag.dict",
"load_path": "{MODEL_PATH}/tag.dict",
"fit_on": ["y"],
"in": ["y"],
"out": ["y_ind"]
},
{
"class_name": "torch_transformers_sequence_tagger",
"n_tags": "#tag_vocab.len",
"pretrained_bert": "{TRANSFORMER}",
"attention_probs_keep_prob": 0.5,
"use_crf": true,
"encoder_layer_ids": [-1],
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05,
"weight_decay": 1e-06,
"betas": [0.9, 0.999],
"eps": 1e-06
},
"clip_norm": 1.0,
"min_learning_rate": 1e-07,
"learning_rate_drop_patience": 20,
"learning_rate_drop_div": 1.5,
"load_before_drop": true,
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"in": ["x_subword_tok_ids", "attention_mask", "startofword_markers"],
"in_y": ["y_ind"],
"out": ["y_pred_ind", "probas"]
},
{
"ref": "tag_vocab",
"in": ["y_pred_ind"],
"out": ["y_pred"]
}
],
"out": ["x_tokens", "y_pred"]
},
"train": {
"epochs": 50,
"batch_size": 8,
"metrics": [
{
"name": "ner_f1",
"inputs": ["y", "y_pred"]
},
{
"name": "ner_token_f1",
"inputs": ["y", "y_pred"]
}
],
"validation_patience": 100,
"val_every_n_batches": 50,
"log_every_n_batches": 50,
"show_examples": false,
"pytest_max_batches": 2,
"pytest_batch_size": 8,
"evaluation_targets": ["test", "valid"],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "~/.deeppavlov/downloads",
"MODELS_PATH": "~/.deeppavlov/models",
"TRANSFORMER": "distilbert-base-multilingual-cased",
"MODEL_PATH": "{MODELS_PATH}/ner/ner_case_agnostic_mdistilbert"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/ner/ner_case_agnostic_mdistilbert.tar.gz",
"subdir": "{MODELS_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/ner/ner_collection3_bert.json
================================================
{
"dataset_reader": {
"class_name": "conll2003_reader",
"data_path": "{DOWNLOADS_PATH}/collection3/",
"dataset_name": "collection3",
"provide_pos": false,
"provide_chunk": false,
"iobes": true
},
"dataset_iterator": {
"class_name": "data_learning_iterator"
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_ner_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 512,
"max_subword_length": 15,
"token_masking_prob": 0.0,
"in": [
"x"
],
"out": [
"x_tokens",
"x_subword_tokens",
"x_subword_tok_ids",
"startofword_markers",
"attention_mask",
"tokens_offsets"
]
},
{
"id": "tag_vocab",
"class_name": "simple_vocab",
"unk_token": [
"O"
],
"pad_with_zeros": true,
"save_path": "{MODEL_PATH}/tag.dict",
"load_path": "{MODEL_PATH}/tag.dict",
"fit_on": [
"y"
],
"in": [
"y"
],
"out": [
"y_ind"
]
},
{
"class_name": "torch_transformers_sequence_tagger",
"n_tags": "#tag_vocab.len",
"pretrained_bert": "{TRANSFORMER}",
"attention_probs_keep_prob": 0.5,
"encoder_layer_ids": [
-1
],
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05,
"weight_decay": 1e-06,
"betas": [
0.9,
0.999
],
"eps": 1e-06
},
"clip_norm": 1.0,
"min_learning_rate": 1e-07,
"learning_rate_drop_patience": 30,
"learning_rate_drop_div": 1.5,
"load_before_drop": true,
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"in": [
"x_subword_tok_ids",
"attention_mask",
"startofword_markers"
],
"in_y": [
"y_ind"
],
"out": [
"y_pred_ind",
"probas"
]
},
{
"ref": "tag_vocab",
"in": [
"y_pred_ind"
],
"out": [
"y_pred"
]
}
],
"out": [
"x_tokens",
"y_pred"
]
},
"train": {
"epochs": 30,
"batch_size": 10,
"metrics": [
{
"name": "ner_f1",
"inputs": [
"y",
"y_pred"
]
},
{
"name": "ner_token_f1",
"inputs": [
"y",
"y_pred"
]
}
],
"validation_patience": 100,
"val_every_n_batches": 20,
"log_every_n_batches": 20,
"show_examples": false,
"pytest_max_batches": 2,
"pytest_batch_size": 8,
"evaluation_targets": [
"valid",
"test"
],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"TRANSFORMER": "DeepPavlov/rubert-base-cased",
"MODEL_PATH": "{MODELS_PATH}/ner_rus_bert_coll3_torch"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/ner/ner_rus_bert_coll3_torch.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/ner/ner_conll2003_bert.json
================================================
{
"dataset_reader": {
"class_name": "conll2003_reader",
"data_path": "{DOWNLOADS_PATH}/conll2003/",
"dataset_name": "conll2003",
"provide_pos": false
},
"dataset_iterator": {
"class_name": "data_learning_iterator"
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_ner_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": false,
"max_seq_length": 512,
"max_subword_length": 15,
"token_masking_prob": 0.0,
"in": [
"x"
],
"out": [
"x_tokens",
"x_subword_tokens",
"x_subword_tok_ids",
"startofword_markers",
"attention_mask",
"tokens_offsets"
]
},
{
"id": "tag_vocab",
"class_name": "simple_vocab",
"unk_token": [
"O"
],
"pad_with_zeros": true,
"save_path": "{MODEL_PATH}/tag.dict",
"load_path": "{MODEL_PATH}/tag.dict",
"fit_on": [
"y"
],
"in": [
"y"
],
"out": [
"y_ind"
]
},
{
"class_name": "torch_transformers_sequence_tagger",
"n_tags": "#tag_vocab.len",
"pretrained_bert": "{TRANSFORMER}",
"attention_probs_keep_prob": 0.5,
"use_crf": true,
"encoder_layer_ids": [
-1
],
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 2e-05,
"weight_decay": 1e-06,
"betas": [
0.9,
0.999
],
"eps": 1e-06
},
"clip_norm": 1.0,
"min_learning_rate": 1e-07,
"learning_rate_drop_patience": 30,
"learning_rate_drop_div": 1.5,
"load_before_drop": true,
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"in": [
"x_subword_tok_ids",
"attention_mask",
"startofword_markers"
],
"in_y": [
"y_ind"
],
"out": [
"y_pred_ind",
"probas"
]
},
{
"ref": "tag_vocab",
"in": [
"y_pred_ind"
],
"out": [
"y_pred"
]
}
],
"out": [
"x_tokens",
"y_pred"
]
},
"train": {
"epochs": 30,
"batch_size": 16,
"metrics": [
{
"name": "ner_f1",
"inputs": [
"y",
"y_pred"
]
},
{
"name": "ner_token_f1",
"inputs": [
"y",
"y_pred"
]
}
],
"validation_patience": 100,
"val_every_n_batches": 20,
"log_every_n_batches": 20,
"show_examples": false,
"pytest_max_batches": 2,
"pytest_batch_size": 8,
"evaluation_targets": [
"valid",
"test"
],
"class_name": "torch_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"TRANSFORMER": "bert-base-cased",
"MODEL_PATH": "{MODELS_PATH}/ner_conll2003_torch_bert_crf"
},
"download": [
{
"url": "http://files.deeppavlov.ai/v1/ner/ner_conll2003_bert_torch_crf.tar.gz",
"subdir": "{MODEL_PATH}"
}
]
}
}
================================================
FILE: deeppavlov/configs/ner/ner_conll2003_deberta_crf.json
====================================
gitextract__x5jpadh/
├── .github/
│ └── ISSUE_TEMPLATE/
│ ├── bug_report.md
│ ├── config.yml
│ └── feature-request.md
├── .gitignore
├── .readthedocs.yml
├── CNAME
├── Jenkinsfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── _config.yml
├── _layouts/
│ └── default.html
├── deeppavlov/
│ ├── __init__.py
│ ├── __main__.py
│ ├── _meta.py
│ ├── configs/
│ │ ├── __init__.py
│ │ ├── classifiers/
│ │ │ ├── boolqa_rubert.json
│ │ │ ├── few_shot_roberta.json
│ │ │ ├── glue/
│ │ │ │ ├── glue_cola_roberta.json
│ │ │ │ ├── glue_mnli_cased_bert_torch.json
│ │ │ │ ├── glue_mnli_mm_cased_bert_torch.json
│ │ │ │ ├── glue_mnli_roberta.json
│ │ │ │ ├── glue_mrpc_roberta.json
│ │ │ │ ├── glue_qnli_roberta.json
│ │ │ │ ├── glue_qqp_roberta.json
│ │ │ │ ├── glue_rte_cased_bert_torch.json
│ │ │ │ ├── glue_rte_roberta_mnli.json
│ │ │ │ ├── glue_sst2_roberta.json
│ │ │ │ ├── glue_stsb_roberta.json
│ │ │ │ └── glue_wnli_roberta.json
│ │ │ ├── insults_kaggle_bert.json
│ │ │ ├── paraphraser_convers_distilrubert_2L.json
│ │ │ ├── paraphraser_convers_distilrubert_6L.json
│ │ │ ├── paraphraser_rubert.json
│ │ │ ├── query_pr.json
│ │ │ ├── rusentiment_bert.json
│ │ │ ├── rusentiment_convers_bert.json
│ │ │ ├── rusentiment_convers_distilrubert_2L.json
│ │ │ ├── rusentiment_convers_distilrubert_6L.json
│ │ │ ├── sentiment_sst_conv_bert.json
│ │ │ ├── sentiment_twitter.json
│ │ │ ├── superglue/
│ │ │ │ ├── superglue_boolq_roberta_mnli.json
│ │ │ │ ├── superglue_copa_roberta.json
│ │ │ │ ├── superglue_record_roberta.json
│ │ │ │ └── superglue_wic_bert.json
│ │ │ └── topics_distilbert_base_uncased.json
│ │ ├── doc_retrieval/
│ │ │ ├── en_ranker_pop_wiki.json
│ │ │ ├── en_ranker_tfidf_wiki.json
│ │ │ └── ru_ranker_tfidf_wiki.json
│ │ ├── embedder/
│ │ │ ├── bert_embedder.json
│ │ │ └── bert_sentence_embedder.json
│ │ ├── entity_extraction/
│ │ │ ├── entity_detection_en.json
│ │ │ ├── entity_detection_ru.json
│ │ │ ├── entity_extraction_en.json
│ │ │ ├── entity_extraction_ru.json
│ │ │ ├── entity_linking_en.json
│ │ │ └── entity_linking_ru.json
│ │ ├── faq/
│ │ │ └── fasttext_logreg.json
│ │ ├── kbqa/
│ │ │ ├── kbqa_cq_en.json
│ │ │ ├── kbqa_cq_ru.json
│ │ │ └── wiki_parser.json
│ │ ├── morpho_syntax_parser/
│ │ │ ├── morpho_ru_syntagrus_bert.json
│ │ │ ├── ru_syntagrus_joint_parsing.json
│ │ │ └── syntax_ru_syntagrus_bert.json
│ │ ├── multitask/
│ │ │ ├── mt_glue.json
│ │ │ └── multitask_example.json
│ │ ├── ner/
│ │ │ ├── ner_bert_base.json
│ │ │ ├── ner_case_agnostic_mdistilbert.json
│ │ │ ├── ner_collection3_bert.json
│ │ │ ├── ner_conll2003_bert.json
│ │ │ ├── ner_conll2003_deberta_crf.json
│ │ │ ├── ner_ontonotes_bert.json
│ │ │ ├── ner_ontonotes_bert_mult.json
│ │ │ ├── ner_ontonotes_deberta_crf.json
│ │ │ ├── ner_rus_bert.json
│ │ │ ├── ner_rus_bert_probas.json
│ │ │ ├── ner_rus_convers_distilrubert_2L.json
│ │ │ └── ner_rus_convers_distilrubert_6L.json
│ │ ├── odqa/
│ │ │ ├── en_odqa_infer_wiki.json
│ │ │ ├── en_odqa_pop_infer_wiki.json
│ │ │ └── ru_odqa_infer_wiki.json
│ │ ├── ranking/
│ │ │ ├── path_ranking_nll_roberta_en.json
│ │ │ ├── ranking_ubuntu_v2_torch_bert_uncased.json
│ │ │ ├── rel_ranking_nll_bert_ru.json
│ │ │ └── rel_ranking_roberta_en.json
│ │ ├── regressors/
│ │ │ └── translation_ranker.json
│ │ ├── relation_extraction/
│ │ │ ├── re_docred.json
│ │ │ └── re_rured.json
│ │ ├── russian_super_glue/
│ │ │ ├── russian_superglue_danetqa_rubert.json
│ │ │ ├── russian_superglue_lidirus_rubert.json
│ │ │ ├── russian_superglue_muserc_rubert.json
│ │ │ ├── russian_superglue_parus_rubert.json
│ │ │ ├── russian_superglue_rcb_rubert.json
│ │ │ ├── russian_superglue_rucos_rubert.json
│ │ │ ├── russian_superglue_russe_rubert.json
│ │ │ ├── russian_superglue_rwsd_rubert.json
│ │ │ └── russian_superglue_terra_rubert.json
│ │ ├── sentence_segmentation/
│ │ │ └── sentseg_dailydialog_bert.json
│ │ ├── spelling_correction/
│ │ │ ├── brillmoore_wikitypos_en.json
│ │ │ └── levenshtein_corrector_ru.json
│ │ └── squad/
│ │ ├── qa_multisberquad_bert.json
│ │ ├── qa_nq_psgcls_bert.json
│ │ ├── qa_squad2_bert.json
│ │ ├── squad_bert.json
│ │ ├── squad_ru_bert.json
│ │ ├── squad_ru_convers_distilrubert_2L.json
│ │ └── squad_ru_convers_distilrubert_6L.json
│ ├── core/
│ │ ├── __init__.py
│ │ ├── commands/
│ │ │ ├── __init__.py
│ │ │ ├── infer.py
│ │ │ ├── train.py
│ │ │ └── utils.py
│ │ ├── common/
│ │ │ ├── __init__.py
│ │ │ ├── aliases.py
│ │ │ ├── base.py
│ │ │ ├── chainer.py
│ │ │ ├── cross_validation.py
│ │ │ ├── errors.py
│ │ │ ├── file.py
│ │ │ ├── log.py
│ │ │ ├── log_events.py
│ │ │ ├── metrics_registry.json
│ │ │ ├── metrics_registry.py
│ │ │ ├── params.py
│ │ │ ├── params_search.py
│ │ │ ├── paths.py
│ │ │ ├── prints.py
│ │ │ ├── registry.json
│ │ │ ├── registry.py
│ │ │ └── requirements_registry.json
│ │ ├── data/
│ │ │ ├── __init__.py
│ │ │ ├── data_fitting_iterator.py
│ │ │ ├── data_learning_iterator.py
│ │ │ ├── dataset_reader.py
│ │ │ ├── simple_vocab.py
│ │ │ └── utils.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── component.py
│ │ │ ├── estimator.py
│ │ │ ├── nn_model.py
│ │ │ ├── serializable.py
│ │ │ └── torch_model.py
│ │ └── trainers/
│ │ ├── __init__.py
│ │ ├── fit_trainer.py
│ │ ├── nn_trainer.py
│ │ ├── torch_trainer.py
│ │ └── utils.py
│ ├── dataset_iterators/
│ │ ├── __init__.py
│ │ ├── basic_classification_iterator.py
│ │ ├── huggingface_dataset_iterator.py
│ │ ├── morphotagger_iterator.py
│ │ ├── multitask_iterator.py
│ │ ├── siamese_iterator.py
│ │ ├── sqlite_iterator.py
│ │ ├── squad_iterator.py
│ │ └── typos_iterator.py
│ ├── dataset_readers/
│ │ ├── __init__.py
│ │ ├── basic_classification_reader.py
│ │ ├── boolqa_reader.py
│ │ ├── conll2003_reader.py
│ │ ├── docred_reader.py
│ │ ├── faq_reader.py
│ │ ├── huggingface_dataset_reader.py
│ │ ├── imdb_reader.py
│ │ ├── line_reader.py
│ │ ├── morphotagging_dataset_reader.py
│ │ ├── multitask_reader.py
│ │ ├── odqa_reader.py
│ │ ├── paraphraser_reader.py
│ │ ├── rel_ranking_reader.py
│ │ ├── rured_reader.py
│ │ ├── sq_reader.py
│ │ ├── squad_dataset_reader.py
│ │ ├── typos_reader.py
│ │ └── ubuntu_v2_reader.py
│ ├── deep.py
│ ├── download.py
│ ├── metrics/
│ │ ├── __init__.py
│ │ ├── accuracy.py
│ │ ├── bleu.py
│ │ ├── correlation.py
│ │ ├── elmo_metrics.py
│ │ ├── fmeasure.py
│ │ ├── google_bleu.py
│ │ ├── log_loss.py
│ │ ├── mse.py
│ │ ├── recall_at_k.py
│ │ ├── record_metrics.py
│ │ ├── roc_auc_score.py
│ │ └── squad_metrics.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── api_requester/
│ │ │ ├── __init__.py
│ │ │ ├── api_requester.py
│ │ │ └── api_router.py
│ │ ├── classifiers/
│ │ │ ├── __init__.py
│ │ │ ├── cos_sim_classifier.py
│ │ │ ├── dnnc_proba2labels.py
│ │ │ ├── proba2labels.py
│ │ │ ├── re_bert.py
│ │ │ ├── torch_classification_model.py
│ │ │ ├── torch_nets.py
│ │ │ └── utils.py
│ │ ├── doc_retrieval/
│ │ │ ├── __init__.py
│ │ │ ├── bpr.py
│ │ │ ├── logit_ranker.py
│ │ │ ├── pop_ranker.py
│ │ │ ├── tfidf_ranker.py
│ │ │ └── utils.py
│ │ ├── embedders/
│ │ │ ├── __init__.py
│ │ │ ├── abstract_embedder.py
│ │ │ ├── fasttext_embedder.py
│ │ │ ├── tfidf_weighted_embedder.py
│ │ │ └── transformers_embedder.py
│ │ ├── entity_extraction/
│ │ │ ├── __init__.py
│ │ │ ├── entity_detection_parser.py
│ │ │ ├── entity_linking.py
│ │ │ ├── find_word.py
│ │ │ └── ner_chunker.py
│ │ ├── kbqa/
│ │ │ ├── __init__.py
│ │ │ ├── query_generator.py
│ │ │ ├── query_generator_base.py
│ │ │ ├── rel_ranking_infer.py
│ │ │ ├── ru_adj_to_noun.py
│ │ │ ├── sentence_answer.py
│ │ │ ├── template_matcher.py
│ │ │ ├── tree_to_sparql.py
│ │ │ ├── type_define.py
│ │ │ ├── utils.py
│ │ │ └── wiki_parser.py
│ │ ├── morpho_syntax_parser/
│ │ │ ├── __init__.py
│ │ │ ├── dependency_decoding.py
│ │ │ ├── joint.py
│ │ │ ├── spacy_lemmatizer.py
│ │ │ └── syntax_parsing.py
│ │ ├── preprocessors/
│ │ │ ├── __init__.py
│ │ │ ├── dirty_comments_preprocessor.py
│ │ │ ├── dnnc_preprocessor.py
│ │ │ ├── mask.py
│ │ │ ├── multitask_preprocessor.py
│ │ │ ├── ner_preprocessor.py
│ │ │ ├── odqa_preprocessors.py
│ │ │ ├── one_hotter.py
│ │ │ ├── re_preprocessor.py
│ │ │ ├── response_base_loader.py
│ │ │ ├── sanitizer.py
│ │ │ ├── sentseg_preprocessor.py
│ │ │ ├── squad_preprocessor.py
│ │ │ ├── str_lower.py
│ │ │ ├── str_token_reverser.py
│ │ │ ├── str_utf8_encoder.py
│ │ │ ├── torch_transformers_preprocessor.py
│ │ │ └── transformers_preprocessor.py
│ │ ├── ranking/
│ │ │ ├── __init__.py
│ │ │ └── metrics.py
│ │ ├── relation_extraction/
│ │ │ ├── __init__.py
│ │ │ ├── losses.py
│ │ │ └── relation_extraction_bert.py
│ │ ├── sklearn/
│ │ │ ├── __init__.py
│ │ │ └── sklearn_component.py
│ │ ├── spelling_correction/
│ │ │ ├── __init__.py
│ │ │ ├── brillmoore/
│ │ │ │ ├── __init__.py
│ │ │ │ └── error_model.py
│ │ │ ├── electors/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── kenlm_elector.py
│ │ │ │ └── top1_elector.py
│ │ │ └── levenshtein/
│ │ │ ├── __init__.py
│ │ │ ├── levenshtein_searcher.py
│ │ │ ├── searcher_component.py
│ │ │ └── tabled_trie.py
│ │ ├── tokenizers/
│ │ │ ├── __init__.py
│ │ │ ├── lazy_tokenizer.py
│ │ │ ├── nltk_moses_tokenizer.py
│ │ │ ├── nltk_tokenizer.py
│ │ │ ├── spacy_tokenizer.py
│ │ │ ├── split_tokenizer.py
│ │ │ └── utils.py
│ │ ├── torch_bert/
│ │ │ ├── __init__.py
│ │ │ ├── crf.py
│ │ │ ├── multitask_transformer.py
│ │ │ ├── torch_bert_ranker.py
│ │ │ ├── torch_transformers_classifier.py
│ │ │ ├── torch_transformers_el_ranker.py
│ │ │ ├── torch_transformers_multiplechoice.py
│ │ │ ├── torch_transformers_nll_ranking.py
│ │ │ ├── torch_transformers_sequence_tagger.py
│ │ │ ├── torch_transformers_squad.py
│ │ │ └── torch_transformers_syntax_parser.py
│ │ └── vectorizers/
│ │ ├── __init__.py
│ │ └── hashing_tfidf_vectorizer.py
│ ├── paramsearch.py
│ ├── requirements/
│ │ ├── datasets.txt
│ │ ├── dependency_decoding.txt
│ │ ├── en_core_web_sm.txt
│ │ ├── faiss.txt
│ │ ├── fasttext.txt
│ │ ├── hdt.txt
│ │ ├── kenlm.txt
│ │ ├── lxml.txt
│ │ ├── opt_einsum.txt
│ │ ├── protobuf.txt
│ │ ├── pytorch.txt
│ │ ├── rapidfuzz.txt
│ │ ├── razdel.txt
│ │ ├── ru_core_news_sm.txt
│ │ ├── sacremoses.txt
│ │ ├── sentencepiece.txt
│ │ ├── slovnet.txt
│ │ ├── sortedcontainers.txt
│ │ ├── torchcrf.txt
│ │ ├── transformers.txt
│ │ ├── udapi.txt
│ │ └── whapi.txt
│ ├── settings.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── benchmarks/
│ │ │ ├── __init__.py
│ │ │ └── benchmarks.py
│ │ ├── connector/
│ │ │ ├── __init__.py
│ │ │ └── dialog_logger.py
│ │ ├── pip_wrapper/
│ │ │ ├── __init__.py
│ │ │ └── pip_wrapper.py
│ │ ├── server/
│ │ │ ├── __init__.py
│ │ │ ├── metrics.py
│ │ │ └── server.py
│ │ ├── settings/
│ │ │ ├── __init__.py
│ │ │ ├── dialog_logger_config.json
│ │ │ ├── log_config.json
│ │ │ └── server_config.json
│ │ └── socket/
│ │ ├── __init__.py
│ │ └── socket.py
│ └── vocabs/
│ ├── __init__.py
│ ├── typos.py
│ └── wiki_sqlite.py
├── docs/
│ ├── Makefile
│ ├── _static/
│ │ ├── deeppavlov.css
│ │ └── my_blocks.css
│ ├── _templates/
│ │ └── footer.html
│ ├── apiref/
│ │ ├── core/
│ │ │ ├── commands.rst
│ │ │ ├── common.rst
│ │ │ ├── data.rst
│ │ │ ├── models.rst
│ │ │ └── trainers.rst
│ │ ├── core.rst
│ │ ├── dataset_iterators.rst
│ │ ├── dataset_readers.rst
│ │ ├── metrics.rst
│ │ ├── models/
│ │ │ ├── api_requester.rst
│ │ │ ├── classifiers.rst
│ │ │ ├── doc_retrieval.rst
│ │ │ ├── embedders.rst
│ │ │ ├── entity_extraction.rst
│ │ │ ├── kbqa.rst
│ │ │ ├── preprocessors.rst
│ │ │ ├── relation_extraction.rst
│ │ │ ├── sklearn.rst
│ │ │ ├── spelling_correction.rst
│ │ │ ├── tokenizers.rst
│ │ │ ├── torch_bert.rst
│ │ │ └── vectorizers.rst
│ │ ├── models.rst
│ │ └── vocabs.rst
│ ├── conf.py
│ ├── devguides/
│ │ ├── contribution_guide.rst
│ │ └── registry.rst
│ ├── features/
│ │ ├── hypersearch.rst
│ │ ├── models/
│ │ │ ├── KBQA.ipynb
│ │ │ ├── NER.ipynb
│ │ │ ├── ODQA.ipynb
│ │ │ ├── SQuAD.ipynb
│ │ │ ├── bert.rst
│ │ │ ├── classification.ipynb
│ │ │ ├── entity_extraction.ipynb
│ │ │ ├── few_shot_classification.ipynb
│ │ │ ├── morpho_tagger.ipynb
│ │ │ ├── multitask_bert.rst
│ │ │ ├── neural_ranking.ipynb
│ │ │ ├── popularity_ranking.rst
│ │ │ ├── relation_extraction.ipynb
│ │ │ ├── spelling_correction.ipynb
│ │ │ ├── superglue.rst
│ │ │ ├── syntax_parser.ipynb
│ │ │ └── tfidf_ranking.ipynb
│ │ ├── overview.rst
│ │ └── pretrained_vectors.rst
│ ├── index.rst
│ ├── integrations/
│ │ ├── aws_ec2.rst
│ │ ├── rest_api.rst
│ │ ├── settings.rst
│ │ └── socket_api.rst
│ ├── internships/
│ │ └── internships.rst
│ └── intro/
│ ├── configuration.rst
│ ├── installation.rst
│ ├── overview.rst
│ ├── python.ipynb
│ └── quick_start.rst
├── requirements.txt
├── setup.py
├── tests/
│ ├── __init__.py
│ ├── test_configs/
│ │ └── doc_retrieval/
│ │ ├── en_ranker_pop_wiki_test.json
│ │ ├── en_ranker_tfidf_wiki_test.json
│ │ └── ru_ranker_tfidf_wiki_test.json
│ └── test_quick_start.py
└── utils/
├── Docker/
│ ├── Dockerfile
│ ├── README.md
│ ├── cmd.sh
│ └── docker-compose.yml
├── __init__.py
└── prepare/
├── __init__.py
├── hashes.py
├── optimize_ipynb.py
├── registry.py
└── upload.py
SYMBOL INDEX (1080 symbols across 167 files)
FILE: deeppavlov/__init__.py
function train_model (line 29) | def train_model(config: [str, Path, dict], install: bool = False,
function evaluate_model (line 35) | def evaluate_model(config: [str, Path, dict], install: bool = False,
FILE: deeppavlov/configs/__init__.py
class Struct (line 5) | class Struct:
method __iter__ (line 6) | def __iter__(self) -> Iterator[str]:
method __len__ (line 9) | def __len__(self) -> int:
method __init__ (line 12) | def __init__(self, tree: Dict[str, Union[dict, Path]]) -> None:
method _asdict (line 23) | def _asdict(self, *, to_string: bool=False) -> dict:
method __getitem__ (line 35) | def __getitem__(self, key: str) -> Union[dict, Path]:
method __dir__ (line 44) | def __dir__(self) -> Iterable:
method _ipython_key_completions_ (line 47) | def _ipython_key_completions_(self) -> Iterable:
method __str__ (line 50) | def __str__(self) -> str:
method __repr__ (line 53) | def __repr__(self) -> str:
method _repr_pretty_ (line 56) | def _repr_pretty_(self, p, cycle):
function _build_configs_tree (line 70) | def _build_configs_tree() -> Struct:
FILE: deeppavlov/core/commands/infer.py
function build_model (line 31) | def build_model(config: Union[str, Path, dict], mode: str = 'infer',
function interact_model (line 70) | def interact_model(config: Union[str, Path, dict]) -> None:
function predict_on_stream (line 89) | def predict_on_stream(config: Union[str, Path, dict],
FILE: deeppavlov/core/commands/train.py
function read_data_by_config (line 32) | def read_data_by_config(config: dict):
function get_iterator_from_config (line 61) | def get_iterator_from_config(config: dict, data: dict):
function train_evaluate_model_from_config (line 69) | def train_evaluate_model_from_config(config: Union[str, Path, dict],
FILE: deeppavlov/core/commands/utils.py
function _parse_config_property (line 27) | def _parse_config_property(item: _T, variables: Dict[str, Union[str, Pat...
function _get_variables_from_config (line 42) | def _get_variables_from_config(config: Union[str, Path, dict]):
function _update_requirements (line 65) | def _update_requirements(config: dict) -> dict:
function _overwrite (line 93) | def _overwrite(data: Any, value: Any, nested_keys: list) -> None:
function parse_config (line 110) | def parse_config(config: Union[str, Path, dict], overwrite: Optional[dic...
function expand_path (line 135) | def expand_path(path: Union[str, Path]) -> Path:
function import_packages (line 140) | def import_packages(packages: list) -> None:
function parse_value_with_config (line 146) | def parse_value_with_config(value: Union[str, Path], config: Union[str, ...
FILE: deeppavlov/core/common/base.py
class Element (line 22) | class Element:
method __init__ (line 24) | def __init__(self, component: Union[Component, FunctionType],
class Model (line 46) | class Model(Chainer):
method __init__ (line 48) | def __init__(self, x: Optional[Union[str, list]] = None,
FILE: deeppavlov/core/common/chainer.py
class Chainer (line 29) | class Chainer(Component):
method __init__ (line 50) | def __init__(self, in_x: Union[str, list] = None, out_params: Union[st...
method __getitem__ (line 71) | def __getitem__(self, item):
method _ipython_key_completions_ (line 77) | def _ipython_key_completions_(self):
method __repr__ (line 80) | def __repr__(self):
method _repr_pretty_ (line 98) | def _repr_pretty_(self, p, cycle):
method append (line 127) | def append(self, component: Union[Component, FunctionType], in_x: [str...
method compute (line 185) | def compute(self, x, y=None, targets=None):
method __call__ (line 206) | def __call__(self, *args):
method _compute (line 210) | def _compute(*args, param_names, pipe, targets):
method batched_call (line 241) | def batched_call(self, *args: Reversible, batch_size: int = 16) -> Uni...
method get_main_component (line 271) | def get_main_component(self) -> Optional[Serializable]:
method save (line 278) | def save(self) -> None:
method load (line 283) | def load(self) -> None:
method reset (line 288) | def reset(self) -> None:
method destroy (line 293) | def destroy(self):
FILE: deeppavlov/core/common/cross_validation.py
function change_savepath_for_model (line 33) | def change_savepath_for_model(config):
function delete_dir_for_saved_models (line 49) | def delete_dir_for_saved_models(dirs_for_saved_models):
function create_dirs_to_save_models (line 54) | def create_dirs_to_save_models(dirs_for_saved_models):
function generate_train_valid (line 59) | def generate_train_valid(data, n_folds=5, is_loo=False):
function calc_cv_score (line 85) | def calc_cv_score(config, data=None, n_folds=5, is_loo=False):
FILE: deeppavlov/core/common/errors.py
class ConfigError (line 20) | class ConfigError(Exception):
method __init__ (line 23) | def __init__(self, message):
method __str__ (line 27) | def __str__(self):
FILE: deeppavlov/core/common/file.py
function find_config (line 36) | def find_config(pipeline_config_path: Union[str, Path]) -> Path:
function read_json (line 52) | def read_json(fpath: Union[str, Path]) -> dict:
function save_json (line 57) | def save_json(data: dict, fpath: Union[str, Path]) -> None:
function save_pickle (line 62) | def save_pickle(data: dict, fpath: Union[str, Path]) -> None:
function load_pickle (line 67) | def load_pickle(fpath: Union[str, Path]) -> Any:
function save_jsonl (line 72) | def save_jsonl(data: Iterable[dict], fpath: Union[str, Path]) -> None:
FILE: deeppavlov/core/common/log.py
class ProbeFilter (line 33) | class ProbeFilter(logging.Filter):
method filter (line 36) | def filter(self, record: logging.LogRecord) -> bool:
function init_logger (line 41) | def init_logger():
FILE: deeppavlov/core/common/log_events.py
class TBWriter (line 22) | class TBWriter:
method __init__ (line 23) | def __init__(self, tensorboard_log_dir: str):
method write_train (line 31) | def write_train(self, tag, scalar_value, global_step):
method write_valid (line 34) | def write_valid(self, tag, scalar_value, global_step):
method flush (line 37) | def flush(self):
function get_tb_writer (line 42) | def get_tb_writer(tensorboard_log_dir: Optional[str]) -> Optional[TBWrit...
FILE: deeppavlov/core/common/metrics_registry.py
function fn_from_str (line 33) | def fn_from_str(name: str) -> Callable[..., Any]:
function register_metric (line 46) | def register_metric(metric_name: str) -> Callable[..., Any]:
function get_metric_by_name (line 60) | def get_metric_by_name(name: str) -> Callable[..., Any]:
FILE: deeppavlov/core/common/params.py
function resolve (line 30) | def resolve(val):
function _init_param (line 45) | def _init_param(param, mode):
function from_params (line 58) | def from_params(params: Dict, mode: str = 'infer', **kwargs) -> Union[Co...
FILE: deeppavlov/core/common/params_search.py
class ParamsSearch (line 28) | class ParamsSearch:
method __init__ (line 47) | def __init__(self,
method find_model_path (line 73) | def find_model_path(self, config: dict, key_model: str, path: list = [...
method insert_value_or_dict_into_config (line 99) | def insert_value_or_dict_into_config(config: dict, path: list,
method get_value_from_config (line 123) | def get_value_from_config(config: dict, path: list) -> Any:
method remove_key_from_config (line 146) | def remove_key_from_config(config: dict, path: list) -> Tuple[dict, Any]:
method initialize_params_in_config (line 169) | def initialize_params_in_config(self, basic_config: dict, paths: List[...
method sample_params (line 195) | def sample_params(self, **params) -> dict:
method _sample_from_ranges (line 229) | def _sample_from_ranges(self, opts: dict) -> [int, float]:
method _sample_log (line 252) | def _sample_log(from_: float = 0., to_: float = 1.) -> float:
FILE: deeppavlov/core/common/paths.py
function get_settings_path (line 30) | def get_settings_path() -> Path:
function populate_settings_dir (line 36) | def populate_settings_dir(force: bool = False) -> bool:
FILE: deeppavlov/core/common/prints.py
class RedirectedPrints (line 19) | class RedirectedPrints(redirect_stdout):
method __init__ (line 22) | def __init__(self, new_target=sys.stderr):
FILE: deeppavlov/core/common/registry.py
function cls_from_str (line 34) | def cls_from_str(name: str) -> type:
function register (line 45) | def register(name: str = None) -> type:
function short_name (line 63) | def short_name(cls: type) -> str:
function get_model (line 68) | def get_model(name: str) -> type:
function list_models (line 77) | def list_models() -> list:
FILE: deeppavlov/core/data/data_fitting_iterator.py
class DataFittingIterator (line 25) | class DataFittingIterator:
method __init__ (line 44) | def __init__(self, data: List[str], doc_ids: List[Any] = None,
method get_doc_ids (line 53) | def get_doc_ids(self):
method get_doc_content (line 61) | def get_doc_content(self, doc_id: Any) -> Optional[str]:
method gen_batches (line 73) | def gen_batches(self, batch_size: int, shuffle: bool = None) \
method get_instances (line 109) | def get_instances(self):
FILE: deeppavlov/core/data/data_learning_iterator.py
class DataLearningIterator (line 22) | class DataLearningIterator:
method split (line 35) | def split(self, *args, **kwargs):
method preprocess (line 39) | def preprocess(self, data: List[Tuple[Any, Any]], *args, **kwargs) -> ...
method __init__ (line 43) | def __init__(self, data: Dict[str, List[Tuple[Any, Any]]], seed: int =...
method gen_batches (line 60) | def gen_batches(self, batch_size: int, data_type: str = 'train',
method get_instances (line 91) | def get_instances(self, data_type: str = 'train') -> Tuple[tuple, tuple]:
FILE: deeppavlov/core/data/dataset_reader.py
class DatasetReader (line 18) | class DatasetReader:
method read (line 21) | def read(self, data_path: str, *args, **kwargs) -> Dict[str, List[Tupl...
FILE: deeppavlov/core/data/simple_vocab.py
class SimpleVocabulary (line 31) | class SimpleVocabulary(Estimator):
method __init__ (line 44) | def __init__(self,
method fit (line 64) | def fit(self, *args):
method _add_tokens_with_freqs (line 81) | def _add_tokens_with_freqs(self, tokens, freqs):
method __call__ (line 90) | def __call__(self, batch, is_top=True, **kwargs):
method save (line 103) | def save(self):
method load (line 111) | def load(self):
method load_line (line 128) | def load_line(self, ln):
method len (line 137) | def len(self):
method keys (line 140) | def keys(self):
method values (line 143) | def values(self):
method items (line 146) | def items(self):
method __getitem__ (line 149) | def __getitem__(self, key):
method __contains__ (line 157) | def __contains__(self, item):
method __len__ (line 160) | def __len__(self):
method reset (line 163) | def reset(self):
method idxs2toks (line 172) | def idxs2toks(self, idxs):
FILE: deeppavlov/core/data/utils.py
function get_download_token (line 40) | def get_download_token() -> str:
function s3_download (line 60) | def s3_download(url: str, destination: str) -> None:
function simple_download (line 81) | def simple_download(url: str, destination: Union[Path, str], headers: Op...
function download (line 152) | def download(dest_file_path: [List[Union[str, Path]]], source_url: str, ...
function untar (line 199) | def untar(file_path: Union[Path, str], extract_folder: Optional[Union[Pa...
function ungzip (line 216) | def ungzip(file_path: Union[Path, str], extract_path: Optional[Union[Pat...
function download_decompress (line 238) | def download_decompress(url: str,
function _copytree (line 310) | def _copytree(src: Path, dest: Path) -> None:
function file_md5 (line 329) | def file_md5(fpath: Union[str, Path], chunk_size: int = 2 ** 16) -> Opti...
function mark_done (line 350) | def mark_done(path: Union[Path, str]) -> None:
function is_done (line 367) | def is_done(path: Union[Path, str]) -> bool:
function _get_all_dimensions (line 381) | def _get_all_dimensions(batch: Sequence, level: int = 0, res: Optional[L...
function get_dimensions (line 410) | def get_dimensions(batch: Sequence) -> List[int]:
function zero_pad (line 415) | def zero_pad(batch: Sequence,
function is_str_batch (line 449) | def is_str_batch(batch: Iterable) -> bool:
function flatten_str_batch (line 466) | def flatten_str_batch(batch: Union[str, Iterable]) -> Union[list, chain]:
function zero_pad_truncate (line 486) | def zero_pad_truncate(batch: Sequence[Sequence[Union[int, float, np.inte...
function get_all_elems_from_json (line 520) | def get_all_elems_from_json(search_json: dict, search_key: str) -> list:
function check_nested_dict_keys (line 549) | def check_nested_dict_keys(check_dict: dict, keys: list) -> bool:
function jsonify_data (line 581) | def jsonify_data(data: Any) -> Any:
function chunk_generator (line 613) | def chunk_generator(items_list: list, chunk_size: int) -> Generator[list...
function update_dict_recursive (line 628) | def update_dict_recursive(editable_dict: dict, editing_dict: Mapping) ->...
function path_set_md5 (line 645) | def path_set_md5(url: str) -> str:
function set_query_parameter (line 661) | def set_query_parameter(url: str, param_name: str, param_value: str) -> ...
FILE: deeppavlov/core/models/component.py
class Component (line 22) | class Component(metaclass=ABCMeta):
method __call__ (line 26) | def __call__(self, *args, **kwargs):
method reset (line 29) | def reset(self):
method destroy (line 32) | def destroy(self):
FILE: deeppavlov/core/models/estimator.py
class Estimator (line 21) | class Estimator(Component, Serializable):
method fit (line 25) | def fit(self, *args, **kwargs):
FILE: deeppavlov/core/models/nn_model.py
class NNModel (line 21) | class NNModel(Component, Serializable):
method train_on_batch (line 25) | def train_on_batch(self, x: list, y: list):
method process_event (line 28) | def process_event(self, event_name, data):
FILE: deeppavlov/core/models/serializable.py
class Serializable (line 25) | class Serializable(metaclass=ABCMeta):
method __init__ (line 28) | def __init__(self, save_path: Optional[Union[str, Path]], load_path: O...
method save (line 52) | def save(self, *args, **kwargs):
method load (line 56) | def load(self, *args, **kwargs):
FILE: deeppavlov/core/models/torch_model.py
class TorchModel (line 28) | class TorchModel(NNModel):
method __init__ (line 58) | def __init__(self, model: torch.nn.Module,
method _init_device (line 90) | def _init_device(self, device: Union[torch.device, str]) -> torch.device:
method is_data_parallel (line 102) | def is_data_parallel(self) -> bool:
method load (line 105) | def load(self, fname: Optional[str] = None, *args, **kwargs) -> None:
method save (line 156) | def save(self, fname: Optional[str] = None, *args, **kwargs) -> None:
method process_event (line 189) | def process_event(self, event_name: str, data: dict) -> None:
method train_on_batch (line 214) | def train_on_batch(self, x: list, y: list):
method _make_step (line 217) | def _make_step(self, loss: torch.Tensor) -> None:
FILE: deeppavlov/core/trainers/fit_trainer.py
class FitTrainer (line 38) | class FitTrainer:
method __init__ (line 59) | def __init__(self, chainer_config: dict, *, batch_size: int = -1,
method fit_chainer (line 78) | def fit_chainer(self, iterator: Union[DataFittingIterator, DataLearnin...
method _load (line 115) | def _load(self) -> None:
method get_chainer (line 121) | def get_chainer(self) -> Chainer:
method train (line 126) | def train(self, iterator: Union[DataFittingIterator, DataLearningItera...
method test (line 131) | def test(self, data: Iterable[Tuple[Collection[Any], Collection[Any]]],
method evaluate (line 210) | def evaluate(self, iterator: DataLearningIterator,
FILE: deeppavlov/core/trainers/nn_trainer.py
class NNTrainer (line 37) | class NNTrainer(FitTrainer):
method __init__ (line 95) | def __init__(self, chainer_config: dict, *,
method save (line 153) | def save(self) -> None:
method _is_initial_validation (line 159) | def _is_initial_validation(self):
method _is_first_validation (line 162) | def _is_first_validation(self):
method _validate (line 165) | def _validate(self, iterator: DataLearningIterator,
method _log (line 222) | def _log(self, iterator: DataLearningIterator,
method _send_event (line 259) | def _send_event(self, event_name: str, data: Optional[dict] = None) ->...
method train_on_batches (line 270) | def train_on_batches(self, iterator: DataLearningIterator) -> None:
method train (line 329) | def train(self, iterator: DataLearningIterator) -> None:
FILE: deeppavlov/core/trainers/torch_trainer.py
class TorchTrainer (line 27) | class TorchTrainer(NNTrainer):
method test (line 29) | def test(self, data: Iterable[Tuple[Collection[Any], Collection[Any]]],
method train_on_batches (line 39) | def train_on_batches(self, iterator: DataLearningIterator) -> None:
FILE: deeppavlov/core/trainers/utils.py
function parse_metrics (line 28) | def parse_metrics(metrics: Iterable[Union[str, dict]], in_y: List[str], ...
function prettify_metrics (line 48) | def prettify_metrics(metrics: List[Tuple[str, float]], precision: int = ...
class NumpyArrayEncoder (line 60) | class NumpyArrayEncoder(JSONEncoder):
method default (line 61) | def default(self, obj):
FILE: deeppavlov/dataset_iterators/basic_classification_iterator.py
class BasicClassificationDatasetIterator (line 29) | class BasicClassificationDatasetIterator(DataLearningIterator):
method __init__ (line 52) | def __init__(self, data: dict,
method _split_data (line 102) | def _split_data(self, field_to_split: str = None, split_fields: List[s...
method _merge_data (line 133) | def _merge_data(self, fields_to_merge: List[str] = None, merged_field:...
FILE: deeppavlov/dataset_iterators/huggingface_dataset_iterator.py
class HuggingFaceDatasetIterator (line 24) | class HuggingFaceDatasetIterator(DataLearningIterator):
method preprocess (line 27) | def preprocess(self,
FILE: deeppavlov/dataset_iterators/morphotagger_iterator.py
class MorphoTaggerDatasetIterator (line 24) | class MorphoTaggerDatasetIterator(DataLearningIterator):
method __init__ (line 36) | def __init__(self, data: Dict[str, List[Tuple[Any, Any]]], seed: int =...
method split (line 41) | def split(self, *args, **kwargs) -> None:
method gen_batches (line 53) | def gen_batches(self, batch_size: int, data_type: str = 'train',
FILE: deeppavlov/dataset_iterators/multitask_iterator.py
class MultiTaskIterator (line 32) | class MultiTaskIterator:
method __init__ (line 58) | def __init__(
method _get_data_size (line 134) | def _get_data_size(self, data_type):
method _get_probs (line 138) | def _get_probs(self, data_type):
method _extract_data_type (line 158) | def _extract_data_type(self, data_type):
method _transform_before_yielding (line 165) | def _transform_before_yielding(self, x, y, batch_size):
method gen_batches (line 182) | def gen_batches(self, batch_size: int, data_type: str = "train",
method get_instances (line 240) | def get_instances(self, data_type: str = "train"):
class SingleTaskBatchGenerator (line 275) | class SingleTaskBatchGenerator:
method __init__ (line 287) | def __init__(
method __iter__ (line 312) | def __iter__(self):
method __next__ (line 315) | def __next__(self):
FILE: deeppavlov/dataset_iterators/siamese_iterator.py
class SiameseIterator (line 25) | class SiameseIterator(DataLearningIterator):
method split (line 28) | def split(self, *args, len_valid=1000, len_test=1000, **kwargs) -> None:
FILE: deeppavlov/dataset_iterators/sqlite_iterator.py
class SQLiteDataIterator (line 29) | class SQLiteDataIterator(DataFittingIterator):
method __init__ (line 51) | def __init__(self, load_path: Union[str, Path], batch_size: Optional[i...
method get_doc_ids (line 74) | def get_doc_ids(self) -> List[Any]:
method get_db_name (line 86) | def get_db_name(self) -> str:
method map_doc2idx (line 100) | def map_doc2idx(self) -> Dict[int, Any]:
method get_doc_content (line 112) | def get_doc_content(self, doc_id: Any) -> Optional[str]:
method gen_batches (line 131) | def gen_batches(self, batch_size: int, shuffle: bool = None) \
method get_instances (line 162) | def get_instances(self):
FILE: deeppavlov/dataset_iterators/squad_iterator.py
class SquadIterator (line 26) | class SquadIterator(DataLearningIterator):
method preprocess (line 41) | def preprocess(self, data: Dict[str, Any], *args, **kwargs) -> \
class MultiSquadIterator (line 74) | class MultiSquadIterator(DataLearningIterator):
method __init__ (line 96) | def __init__(self, data, seed: Optional[int] = None, shuffle: bool = T...
method gen_batches (line 103) | def gen_batches(self, batch_size: int, data_type: str = 'train', shuff...
method get_instances (line 144) | def get_instances(self, data_type: str = 'train') -> Tuple[Tuple[Tuple...
class MultiSquadRetrIterator (line 156) | class MultiSquadRetrIterator(DataLearningIterator):
method __init__ (line 181) | def __init__(self, data, seed: Optional[int] = None, shuffle: bool = F...
method gen_batches (line 203) | def gen_batches(self, batch_size: int, data_type: str = 'train', shuff...
method get_instances (line 265) | def get_instances(self, data_type: str = 'train') -> Tuple[Tuple[Tuple...
FILE: deeppavlov/dataset_iterators/typos_iterator.py
class TyposDatasetIterator (line 20) | class TyposDatasetIterator(DataLearningIterator):
method split (line 26) | def split(self, test_ratio: float = 0., *args, **kwargs):
FILE: deeppavlov/dataset_readers/basic_classification_reader.py
class BasicClassificationDatasetReader (line 29) | class BasicClassificationDatasetReader(DatasetReader):
method read (line 34) | def read(self, data_path: str, url: str = None,
FILE: deeppavlov/dataset_readers/boolqa_reader.py
class BoolqaReader (line 26) | class BoolqaReader(DatasetReader):
method read (line 47) | def read(self,
method _build_data (line 81) | def _build_data(ln: str, data_path: Path) -> List[Tuple[Tuple[str, str...
FILE: deeppavlov/dataset_readers/conll2003_reader.py
class Conll2003DatasetReader (line 12) | class Conll2003DatasetReader(DatasetReader):
method read (line 15) | def read(self,
method parse_ner_file (line 60) | def parse_ner_file(self, file_name: Path):
method _iob2_to_iob (line 133) | def _iob2_to_iob(tags):
method _iob2_to_iobes (line 144) | def _iob2_to_iobes(tags):
FILE: deeppavlov/dataset_readers/docred_reader.py
class DocREDDatasetReader (line 34) | class DocREDDatasetReader(DatasetReader):
method read (line 37) | def read(
method split_by_absolute (line 144) | def split_by_absolute(self, all_labeled_data: List) -> Tuple[List, Lis...
method split_by_relative (line 166) | def split_by_relative(self, all_labeled_data: List) -> Tuple[List, Lis...
method process_docred_file (line 179) | def process_docred_file(self, data: List[Dict], neg_samples: str = Non...
method construct_pos_neg_samples (line 240) | def construct_pos_neg_samples(
method construct_neg_samples (line 318) | def construct_neg_samples(
method generate_data_sample (line 343) | def generate_data_sample(
method generate_additional_neg_samples (line 356) | def generate_additional_neg_samples(self, doc: List, forbidden_entitie...
method label_to_one_hot (line 402) | def label_to_one_hot(self, labels: List[int]) -> List:
method print_statistics (line 409) | def print_statistics(self, train_stat: Dict, valid_stat: Dict, test_st...
FILE: deeppavlov/dataset_readers/faq_reader.py
class FaqDatasetReader (line 24) | class FaqDatasetReader(DatasetReader):
method read (line 27) | def read(self, data_path: str = None, data_url: str = None, x_col_name...
FILE: deeppavlov/dataset_readers/huggingface_dataset_reader.py
class HuggingFaceDatasetReader (line 28) | class HuggingFaceDatasetReader(DatasetReader):
method read (line 32) | def read(self,
function interleave_splits (line 158) | def interleave_splits(splits: List[str], percentage: int = 50) -> List[s...
function preprocess_copa (line 176) | def preprocess_copa(examples: Dataset, *, lang: str = "en") -> Dict[str,...
function preprocess_boolq (line 213) | def preprocess_boolq(examples: Dataset) -> Dict[str, List[str]]:
function preprocess_record (line 237) | def preprocess_record(examples: Dataset, *, clean_entities: bool = True)...
function add_label_names (line 332) | def add_label_names(dataset: Dataset, label_column: str, label_names: Li...
function binary_downsample (line 347) | def binary_downsample(dataset: Dataset,
function add_num_examples (line 425) | def add_num_examples(dataset: Dataset) -> Dict[str, List[int]]:
function preprocess_multirc (line 438) | def preprocess_multirc(examples: Dataset, *, clean_paragraphs: bool = Tr...
function preprocess_wsc (line 460) | def preprocess_wsc(dataset: Dataset) -> Dict[str, List[str]]:
FILE: deeppavlov/dataset_readers/imdb_reader.py
class ImdbReader (line 26) | class ImdbReader(DatasetReader):
method read (line 36) | def read(self, data_path: str, url: Optional[str] = None,
FILE: deeppavlov/dataset_readers/line_reader.py
class LineReader (line 22) | class LineReader(DatasetReader):
method read (line 25) | def read(self, data_path: str = None, *args, **kwargs) -> Dict:
FILE: deeppavlov/dataset_readers/morphotagging_dataset_reader.py
function get_language (line 30) | def get_language(filepath: str) -> str:
function read_infile (line 36) | def read_infile(infile: Union[Path, str], *, from_words=False,
class MorphotaggerDatasetReader (line 110) | class MorphotaggerDatasetReader(DatasetReader):
method read (line 115) | def read(self, data_path: Union[List, str],
FILE: deeppavlov/dataset_readers/multitask_reader.py
class MultiTaskReader (line 26) | class MultiTaskReader(DatasetReader):
method read (line 29) | def read(self, tasks: Dict[str, Dict[str, dict]], task_defaults: dict ...
FILE: deeppavlov/dataset_readers/odqa_reader.py
class ODQADataReader (line 35) | class ODQADataReader(DatasetReader):
method read (line 41) | def read(self, data_path: Union[Path, str], db_url: Optional[str] = No...
method iter_files (line 83) | def iter_files(self, path: Union[Path, str]) -> Generator[Path, Any, A...
method _build_db (line 108) | def _build_db(self, save_path: Union[Path, str], dataset_format: str,
method _get_file_contents (line 170) | def _get_file_contents(fpath: Union[Path, str]) -> List[Tuple[str, str]]:
method _get_json_contents (line 186) | def _get_json_contents(fpath: Union[Path, str]) -> List[Tuple[str, str]]:
method _get_wiki_contents (line 210) | def _get_wiki_contents(fpath: Union[Path, str]) -> List[Tuple[str, str]]:
FILE: deeppavlov/dataset_readers/paraphraser_reader.py
class ParaphraserReader (line 25) | class ParaphraserReader(DatasetReader):
method read (line 31) | def read(self,
method _build_data (line 51) | def _build_data(data_path: Path, do_lower_case: bool) -> List[Tuple[Tu...
FILE: deeppavlov/dataset_readers/rel_ranking_reader.py
class ParaphraserReader (line 25) | class ParaphraserReader(DatasetReader):
method read (line 31) | def read(self,
method _build_data (line 51) | def _build_data(data_path: Path, do_lower_case: bool) -> List[Tuple[Tu...
FILE: deeppavlov/dataset_readers/rured_reader.py
class RuREDDatasetReader (line 15) | class RuREDDatasetReader(DatasetReader):
method read (line 18) | def read(self, data_path: str, rel2id: Dict = None) -> Dict[str, List[...
method process_rured_file (line 70) | def process_rured_file(self, data: List[Dict], num_neg_samples: str) -...
method label_to_one_hot (line 150) | def label_to_one_hot(self, label: int) -> List[int]:
method add_default_rel_dict (line 157) | def add_default_rel_dict():
FILE: deeppavlov/dataset_readers/sq_reader.py
class SQReader (line 26) | class SQReader(DatasetReader):
method read (line 29) | def read(self, data_path: str, valid_size: int = None):
class RuBQReader (line 43) | class RuBQReader(SQReader):
method read (line 46) | def read(self, data_path: str, version: str = "2.0", question_types: L...
method preprocess (line 62) | def preprocess(self, sample):
class LCQuADReader (line 76) | class LCQuADReader(SQReader):
method read (line 79) | def read(self, data_path: str, question_types: List[str] = "all",
method preprocess (line 95) | def preprocess(self, sample):
FILE: deeppavlov/dataset_readers/squad_dataset_reader.py
class SquadDatasetReader (line 26) | class SquadDatasetReader(DatasetReader):
method read (line 55) | def read(self, data_path: str, dataset: Optional[str] = 'SQuAD', url: ...
class MultiSquadDatasetReader (line 106) | class MultiSquadDatasetReader(DatasetReader):
method read (line 121) | def read(self, data_path: str, dataset: Optional[str] = 'MultiSQuADRet...
FILE: deeppavlov/dataset_readers/typos_reader.py
class TyposCustom (line 31) | class TyposCustom(DatasetReader):
method __init__ (line 36) | def __init__(self):
method build (line 40) | def build(data_path: str) -> Path:
method read (line 52) | def read(cls, data_path: str, *args, **kwargs) -> Dict[str, List[Tuple...
class TyposWikipedia (line 70) | class TyposWikipedia(TyposCustom):
method build (line 77) | def build(data_path: str) -> Path:
class TyposKartaslov (line 115) | class TyposKartaslov(DatasetReader):
method __init__ (line 121) | def __init__(self):
method build (line 125) | def build(data_path: str) -> Path:
method read (line 149) | def read(data_path: str, *args, **kwargs) -> Dict[str, List[Tuple[str,...
FILE: deeppavlov/dataset_readers/ubuntu_v2_reader.py
class UbuntuV2Reader (line 25) | class UbuntuV2Reader(DatasetReader):
method read (line 31) | def read(self, data_path: str,
method preprocess_data_train (line 56) | def preprocess_data_train(self, train_fname: Union[Path, str]) -> List...
method preprocess_data_validation (line 74) | def preprocess_data_validation(self, fname: Union[Path, str]) -> List[...
FILE: deeppavlov/deep.py
function main (line 57) | def main():
FILE: deeppavlov/download.py
function get_config_downloads (line 43) | def get_config_downloads(config: Union[str, Path, dict]) -> Set[Tuple[st...
function get_configs_downloads (line 66) | def get_configs_downloads(config: Optional[Union[str, Path, dict]] = Non...
function check_md5 (line 80) | def check_md5(url: str, dest_paths: List[Path], headers: Optional[dict] ...
function download_resource (line 129) | def download_resource(url: str, dest_paths: Iterable[Union[Path, str]], ...
function download_resources (line 146) | def download_resources(args: Namespace) -> None:
function deep_download (line 160) | def deep_download(config: Union[str, Path, dict]) -> None:
function main (line 177) | def main(args: Optional[List[str]] = None) -> None:
FILE: deeppavlov/metrics/accuracy.py
function accuracy (line 29) | def accuracy(y_true: [list, np.ndarray], y_predicted: [list, np.ndarray]...
function kbqa_accuracy (line 55) | def kbqa_accuracy(questions_batch, pred_answer_labels_batch, pred_answer...
function multitask_accuracy (line 80) | def multitask_accuracy(*args) -> float:
function multitask_sequence_accuracy (line 101) | def multitask_sequence_accuracy(*args) -> float:
function multitask_token_accuracy (line 125) | def multitask_token_accuracy(*args) -> float:
function sets_accuracy (line 147) | def sets_accuracy(y_true: [list, np.ndarray], y_predicted: [list, np.nda...
function slots_accuracy (line 167) | def slots_accuracy(y_true, y_predicted):
function per_token_accuracy (line 174) | def per_token_accuracy(y_true, y_predicted):
function per_item_dialog_accuracy (line 185) | def per_item_dialog_accuracy(y_true, y_predicted: List[List[str]]):
function round_accuracy (line 195) | def round_accuracy(y_true, y_predicted):
FILE: deeppavlov/metrics/bleu.py
function bleu_advanced (line 27) | def bleu_advanced(y_true: List[Any], y_predicted: List[Any],
function bleu (line 59) | def bleu(y_true, y_predicted):
function google_bleu (line 65) | def google_bleu(y_true, y_predicted):
function per_item_bleu (line 71) | def per_item_bleu(y_true, y_predicted):
function per_item_dialog_bleu (line 78) | def per_item_dialog_bleu(y_true, y_predicted):
FILE: deeppavlov/metrics/correlation.py
function pearson_correlation (line 22) | def pearson_correlation(y_true, y_predicted) -> float:
function spearman_correlation (line 27) | def spearman_correlation(y_true, y_predicted) -> float:
function matthews_correlation (line 32) | def matthews_correlation(y_true, y_predicted) -> float:
FILE: deeppavlov/metrics/elmo_metrics.py
function elmo_loss2ppl (line 23) | def elmo_loss2ppl(losses: List[np.ndarray]) -> float:
FILE: deeppavlov/metrics/fmeasure.py
function ner_f1 (line 29) | def ner_f1(y_true, y_predicted):
function ner_token_f1 (line 53) | def ner_token_f1(y_true, y_predicted, print_results=False):
function _print_conll_report (line 116) | def _print_conll_report(results, accuracy, total_true_entities, total_pr...
function _global_stats_f1 (line 166) | def _global_stats_f1(results):
function round_f1 (line 210) | def round_f1(y_true, y_predicted):
function round_f1_macro (line 237) | def round_f1_macro(y_true, y_predicted):
function round_f1_weighted (line 260) | def round_f1_weighted(y_true, y_predicted):
function chunk_finder (line 282) | def chunk_finder(current_token, previous_token, tag):
function precision_recall_f1 (line 310) | def precision_recall_f1(y_true, y_pred, print_results=True, short_report...
function ner_f1__f1_macro__f1 (line 442) | def ner_f1__f1_macro__f1(ner_true, ner_pred, macro_true, macro_pred, f1_...
function roc_auc__roc_auc__ner_f1 (line 450) | def roc_auc__roc_auc__ner_f1(true_onehot1, pred_probas1, true_onehot2, p...
FILE: deeppavlov/metrics/google_bleu.py
function _get_ngrams (line 28) | def _get_ngrams(segment, max_order):
function compute_bleu (line 48) | def compute_bleu(reference_corpus, translation_corpus, max_order=4,
FILE: deeppavlov/metrics/log_loss.py
function sk_log_loss (line 25) | def sk_log_loss(y_true: Union[List[List[float]], List[List[int]], np.nda...
FILE: deeppavlov/metrics/mse.py
function mse (line 23) | def mse(y_true: Union[np.array, list],
FILE: deeppavlov/metrics/recall_at_k.py
function recall_at_k (line 23) | def recall_at_k(y_true: List[int], y_pred: List[List[np.ndarray]], k: int):
function r_at_1 (line 47) | def r_at_1(y_true, y_pred):
function r_at_2 (line 52) | def r_at_2(y_true, y_pred):
function r_at_5 (line 57) | def r_at_5(labels, predictions):
function r_at_10 (line 62) | def r_at_10(labels, predictions):
FILE: deeppavlov/metrics/record_metrics.py
function record_f1_score (line 13) | def record_f1_score(record_examples: List[RecordNestedExample]):
function record_em_score (line 35) | def record_em_score(record_examples: List[RecordNestedExample]):
function normalize_answer (line 56) | def normalize_answer(s):
function string_f1_score (line 77) | def string_f1_score(prediction, ground_truth):
function exact_match_score (line 93) | def exact_match_score(prediction, ground_truth):
FILE: deeppavlov/metrics/roc_auc_score.py
function roc_auc_score (line 25) | def roc_auc_score(y_true: Union[List[List[float]], List[List[int]], np.n...
FILE: deeppavlov/metrics/squad_metrics.py
function squad_v2_exact_match (line 24) | def squad_v2_exact_match(y_true: List[List[str]], y_predicted: List[str]...
function squad_v1_exact_match (line 44) | def squad_v1_exact_match(y_true: List[List[str]], y_predicted: List[str]...
function squad_v2_f1 (line 68) | def squad_v2_f1(y_true: List[List[str]], y_predicted: List[str]) -> float:
function squad_v1_f1 (line 104) | def squad_v1_f1(y_true: List[List[str]], y_predicted: List[str]) -> float:
function normalize_answer (line 139) | def normalize_answer(s: str) -> str:
FILE: deeppavlov/models/api_requester/api_requester.py
class ApiRequester (line 25) | class ApiRequester(Component):
method __init__ (line 41) | def __init__(self, url: str, out: [int, list], param_names: [list, tup...
method __call__ (line 50) | def __call__(self, *args: List[Any], **kwargs: Dict[str, Any]):
method get_async_response (line 82) | async def get_async_response(self, data: dict, batch_size: int) -> Asy...
FILE: deeppavlov/models/api_requester/api_router.py
class ApiRouter (line 28) | class ApiRouter(Component):
method __init__ (line 40) | def __init__(self, api_requesters: List[ApiRequester], n_workers: int ...
method __call__ (line 44) | def __call__(self, *args):
FILE: deeppavlov/models/classifiers/cos_sim_classifier.py
class CosineSimilarityClassifier (line 33) | class CosineSimilarityClassifier(Estimator, Serializable):
method __init__ (line 42) | def __init__(self, top_n: int = 1, save_path: str = None, load_path: s...
method __call__ (line 51) | def __call__(self, q_vects: Union[csr_matrix, List]) -> Tuple[List[str...
method fit (line 101) | def fit(self, x_train_vects: Tuple[Union[csr_matrix, List]], y_train: ...
method save (line 126) | def save(self) -> None:
method load (line 131) | def load(self) -> None:
FILE: deeppavlov/models/classifiers/dnnc_proba2labels.py
class Proba2Labels (line 27) | class Proba2Labels(Component):
method __init__ (line 38) | def __init__(self,
method __call__ (line 48) | def __call__(self,
FILE: deeppavlov/models/classifiers/proba2labels.py
class Proba2Labels (line 27) | class Proba2Labels(Component):
method __init__ (line 44) | def __init__(self,
method __call__ (line 57) | def __call__(self,
FILE: deeppavlov/models/classifiers/re_bert.py
class BertWithAdaThresholdLocContextPooling (line 18) | class BertWithAdaThresholdLocContextPooling(nn.Module):
method __init__ (line 20) | def __init__(
method forward (line 64) | def forward(
method get_hrt (line 104) | def get_hrt(self, sequence_output: Tensor, attention: Tensor, entity_p...
method load (line 157) | def load(self) -> None:
FILE: deeppavlov/models/classifiers/torch_classification_model.py
class TorchTextClassificationModel (line 30) | class TorchTextClassificationModel(TorchModel):
method __init__ (line 55) | def __init__(self, n_classes: int,
method __call__ (line 86) | def __call__(self, texts: List[np.ndarray], *args) -> Union[List[List[...
method train_on_batch (line 115) | def train_on_batch(self, texts: List[List[np.ndarray]], labels: list) ...
FILE: deeppavlov/models/classifiers/torch_nets.py
class ShallowAndWideCnn (line 21) | class ShallowAndWideCnn(nn.Module):
method __init__ (line 22) | def __init__(self, n_classes: int, embedding_size: int, kernel_sizes_c...
method forward (line 46) | def forward(self, x: torch.Tensor) -> torch.Tensor:
FILE: deeppavlov/models/classifiers/utils.py
function labels2onehot (line 24) | def labels2onehot(labels: [List[str], List[List[str]], np.ndarray], clas...
function proba2labels (line 52) | def proba2labels(proba: [list, np.ndarray], confidence_threshold: float,...
function proba2onehot (line 77) | def proba2onehot(proba: [list, np.ndarray], confidence_threshold: float,...
FILE: deeppavlov/models/doc_retrieval/bpr.py
class FaissBinaryIndex (line 28) | class FaissBinaryIndex:
method __init__ (line 29) | def __init__(self, index: faiss.Index):
method search (line 32) | def search(self, query_embs: np.ndarray, k: int, binary_k=1000, rerank...
class BPR (line 56) | class BPR(Component, Serializable):
method __init__ (line 57) | def __init__(self, pretrained_model: str,
method load (line 77) | def load(self):
method save (line 82) | def save(self) -> None:
method encode_queries (line 85) | def encode_queries(self, queries, batch_size: int = 256) -> np.ndarray:
method __call__ (line 102) | def __call__(self, queries):
FILE: deeppavlov/models/doc_retrieval/logit_ranker.py
class LogitRanker (line 28) | class LogitRanker(Component):
method __init__ (line 45) | def __init__(self, squad_model: Union[Chainer, Component], batch_size:...
method __call__ (line 53) | def __call__(self, contexts_batch: List[List[str]], questions_batch: L...
FILE: deeppavlov/models/doc_retrieval/pop_ranker.py
class PopRanker (line 31) | class PopRanker(Component):
method __init__ (line 57) | def __init__(self, pop_dict_path: str, load_path: str, top_n: int = 3,...
method __call__ (line 69) | def __call__(self, input_doc_ids: List[List[Any]], input_doc_scores: L...
FILE: deeppavlov/models/doc_retrieval/tfidf_ranker.py
class TfidfRanker (line 28) | class TfidfRanker(Component):
method __init__ (line 46) | def __init__(self, vectorizer: HashingTfIdfVectorizer, top_n=5, active...
method __call__ (line 52) | def __call__(self, questions: List[str]) -> Tuple[List[Any], List[floa...
FILE: deeppavlov/models/doc_retrieval/utils.py
function concat_lists (line 23) | def concat_lists(list_a: List[List[Any]], list_b: List[List[Any]]):
function find_answer_sentence (line 30) | def find_answer_sentence(answer_pos: int, context: str) -> str:
FILE: deeppavlov/models/embedders/abstract_embedder.py
class Embedder (line 29) | class Embedder(Component, Serializable, metaclass=ABCMeta):
method __init__ (line 46) | def __init__(self, load_path: Union[str, Path], pad_zero: bool = False...
method save (line 58) | def save(self) -> None:
method __call__ (line 64) | def __call__(self, batch: List[List[str]], mean: bool = None) -> List[...
method __iter__ (line 81) | def __iter__(self) -> Iterator[str]:
method _get_word_vector (line 90) | def _get_word_vector(self, w: str) -> np.ndarray:
method _encode (line 101) | def _encode(self, tokens: List[str], mean: bool) -> Union[List[np.ndar...
FILE: deeppavlov/models/embedders/fasttext_embedder.py
class FasttextEmbedder (line 29) | class FasttextEmbedder(Embedder):
method _get_word_vector (line 45) | def _get_word_vector(self, w: str) -> np.ndarray:
method load (line 48) | def load(self) -> None:
method __iter__ (line 56) | def __iter__(self) -> Iterator[str]:
FILE: deeppavlov/models/embedders/tfidf_weighted_embedder.py
class TfidfWeightedEmbedder (line 30) | class TfidfWeightedEmbedder(Component):
method __init__ (line 78) | def __init__(self,
method load_tags_vocab (line 118) | def load_tags_vocab(load_path: str) -> dict:
method load_counter_vocab (line 141) | def load_counter_vocab(load_path: str) -> Tuple[dict, int]:
method space_detokenizer (line 167) | def space_detokenizer(batch: List[List[str]]) -> List[str]:
method __call__ (line 179) | def __call__(self, batch: List[List[str]], tags_batch: Optional[List[L...
method _encode (line 210) | def _encode(self, tokens: List[str], mean: bool) -> Union[List[np.ndar...
method get_weight (line 247) | def get_weight(self, count: int) -> float:
method _tags_encode (line 262) | def _tags_encode(self, tokens: List[str], tags: List[str], mean: bool)...
FILE: deeppavlov/models/embedders/transformers_embedder.py
class TransformersBertEmbedder (line 26) | class TransformersBertEmbedder(Serializable):
method __init__ (line 38) | def __init__(self, load_path: Union[str, Path], bert_config_path: Unio...
method save (line 48) | def save(self, *args, **kwargs):
method load (line 51) | def load(self):
method __call__ (line 55) | def __call__(self,
FILE: deeppavlov/models/entity_extraction/entity_detection_parser.py
class QuestionSignChecker (line 33) | class QuestionSignChecker:
method __init__ (line 34) | def __init__(self, delete_brackets: bool = False, **kwargs):
method __call__ (line 38) | def __call__(self, questions: List[str]) -> List[str]:
function entity_type_split (line 54) | def entity_type_split(entities_batch: List[List[str]], tags_batch: List[...
class EntityDetectionParser (line 72) | class EntityDetectionParser(Component):
method __init__ (line 75) | def __init__(self, o_tag: str, tags_file: str, entity_tags: List[str] ...
method __call__ (line 118) | def __call__(self, question_tokens_batch: List[List[str]], tokens_info...
method tags_from_probas (line 146) | def tags_from_probas(self, tokens: List[str], probas: np.array) -> Tup...
method correct_tags (line 168) | def correct_tags(self, tokens: List[str], tags: List[str]) -> List[str]:
method correct_quotes (line 194) | def correct_quotes(self, tokens: List[str], tags: List[str], probas: n...
method add_entity (line 226) | def add_entity(self, entity: str, c_tag: str) -> None:
method entities_from_tags (line 246) | def entities_from_tags(self, tokens: List[str], tags: List[str],
FILE: deeppavlov/models/entity_extraction/entity_linking.py
class EntityLinker (line 38) | class EntityLinker(Component, Serializable):
method __init__ (line 43) | def __init__(
method load (line 130) | def load(self) -> None:
method save (line 137) | def save(self) -> None:
method __call__ (line 140) | def __call__(
method link_entities (line 198) | def link_entities(
method define_all_low_conf (line 337) | def define_all_low_conf(self, cand_ent_init, thres):
method correct_tags (line 349) | def correct_tags(self, tags):
method unite_dicts (line 361) | def unite_dicts(self, cand_ent_init, new_cand_ent_init):
method process_cand_ent (line 370) | def process_cand_ent(self, cand_ent_init, entities_and_ids, substr_spl...
method sanitize_substr (line 377) | def sanitize_substr(self, entity_substr, tag):
method find_exact_match (line 384) | def find_exact_match(self, entity_substr, tags, use_tags=True):
method find_fuzzy_match (line 402) | def find_fuzzy_match(self, entity_substr_split, tags, use_tags=True):
method match_tokens (line 422) | def match_tokens(self, entity_substr_split, label_tokens):
method correct_substr_score (line 452) | def correct_substr_score(self, entity_substr_split, label_tokens, subs...
method calc_substr_score (line 477) | def calc_substr_score(self, entity_title, entity_substr_split, tag, en...
method rank_by_description (line 503) | def rank_by_description(
method sort_out_low_conf (line 671) | def sort_out_low_conf(self, entity_substr, top_entities, top_conf):
method rank_by_connections (line 683) | def rank_by_connections(self, ids_list):
FILE: deeppavlov/models/entity_extraction/find_word.py
class WordSearcher (line 27) | class WordSearcher:
method __init__ (line 28) | def __init__(self, words_dict_filename: str, ngrams_matrix_filename: s...
method load (line 41) | def load(self):
method make_ngrams_dicts (line 50) | def make_ngrams_dicts(self):
method __call__ (line 61) | def __call__(self, query, tags):
FILE: deeppavlov/models/entity_extraction/ner_chunker.py
class NerChunker (line 32) | class NerChunker(Component):
method __init__ (line 38) | def __init__(self, vocab_file: str, max_seq_len: int = 400, lowercase:...
method __call__ (line 55) | def __call__(self, docs_batch: List[str]) -> Tuple[List[List[str]], Li...
method sanitize (line 165) | def sanitize(self, text):
class NerChunkModel (line 183) | class NerChunkModel(Component):
method __init__ (line 188) | def __init__(self, ner: Chainer,
method __call__ (line 207) | def __call__(self, text_batch_list: List[List[str]],
method merge_annotations (line 336) | def merge_annotations(self, substr_batch, pos_batch, probas_batch, sub...
FILE: deeppavlov/models/kbqa/query_generator.py
class QueryGenerator (line 37) | class QueryGenerator(QueryGeneratorBase):
method __init__ (line 42) | def __init__(self, wiki_parser: WikiParser,
method __call__ (line 79) | def __call__(self, question_batch: List[str],
method parse_queries_info (line 121) | def parse_queries_info(self, question, queries_info, entity_ids, type_...
method check_valid_query (line 229) | def check_valid_query(self, entities_rel_conn, query_hdt_seq):
method query_parser (line 249) | def query_parser(self, question: str,
method parse_outputs (line 316) | def parse_outputs(self, outputs_list, combs_list, query_info_list, ent...
class QueryFormatter (line 370) | class QueryFormatter(Component):
method __init__ (line 371) | def __init__(self, query_info: Dict[str, str], replace_prefixes: Dict[...
method __call__ (line 375) | def __call__(self, queries_batch):
FILE: deeppavlov/models/kbqa/query_generator_base.py
class QueryGeneratorBase (line 35) | class QueryGeneratorBase(Component, Serializable):
method __init__ (line 41) | def __init__(self, template_matcher: TemplateMatcher,
method load (line 96) | def load(self) -> None:
method save (line 103) | def save(self) -> None:
method find_candidate_answers (line 106) | def find_candidate_answers(self, question: str,
method get_entity_ids (line 162) | def get_entity_ids(self, entities: List[str], tags: List[str], probas:...
method sparql_template_parser (line 183) | def sparql_template_parser(self, question: str,
method find_top_rels (line 241) | def find_top_rels(self, question: str, entity_ids: List[List[str]], tr...
method find_answer_wikihow (line 292) | def find_answer_wikihow(self, howto_sentence: str) -> str:
method query_parser (line 306) | def query_parser(self, question, query_templates, entity_ids, type_ids...
FILE: deeppavlov/models/kbqa/rel_ranking_infer.py
class RelRankerInfer (line 32) | class RelRankerInfer(Component, Serializable):
method __init__ (line 35) | def __init__(self, load_path: str,
method load (line 90) | def load(self) -> None:
method save (line 96) | def save(self) -> None:
method __call__ (line 99) | def __call__(self, questions_batch: List[str],
method preprocess_ranking_input (line 222) | def preprocess_ranking_input(self, question, answers):
method rank_rels (line 256) | def rank_rels(self, question: str, candidate_rels: List[str]) -> List[...
FILE: deeppavlov/models/kbqa/ru_adj_to_noun.py
class RuAdjToNoun (line 31) | class RuAdjToNoun:
method __init__ (line 37) | def __init__(self, freq_dict_filename: str, candidate_nouns: int = 10,...
method search (line 66) | def search(self, word: str):
method make_sparse_matrix (line 82) | def make_sparse_matrix(self, words: List[str]):
FILE: deeppavlov/models/kbqa/sentence_answer.py
function find_tokens (line 38) | def find_tokens(tokens, node, not_inc_node):
function find_inflect_dict (line 46) | def find_inflect_dict(sent_nodes):
function find_wh_node (line 60) | def find_wh_node(sent_nodes):
function find_tokens_to_replace (line 77) | def find_tokens_to_replace(wh_node_head, main_head, question_tokens, que...
function sentence_answer (line 124) | def sentence_answer(question, entity_title, entities=None, template_answ...
FILE: deeppavlov/models/kbqa/template_matcher.py
class RegexpMatcher (line 28) | class RegexpMatcher:
method __init__ (line 29) | def __init__(self, question):
method __call__ (line 32) | def __call__(self, template):
class TemplateMatcher (line 41) | class TemplateMatcher(Serializable):
method __init__ (line 48) | def __init__(self, load_path: str, templates_filename: str,
method load (line 63) | def load(self) -> None:
method save (line 68) | def save(self) -> None:
method __call__ (line 71) | def __call__(self, question: str, entities_from_ner: List[str]) -> \
method sanitize (line 124) | def sanitize(self, question: str) -> str:
method match_template_and_ner (line 132) | def match_template_and_ner(self, entities_cand: List[str], entities_fr...
FILE: deeppavlov/models/kbqa/tree_to_sparql.py
class SlovnetSyntaxParser (line 41) | class SlovnetSyntaxParser(Component, Serializable):
method __init__ (line 44) | def __init__(self, load_path: str, navec_filename: str, syntax_parser_...
method load (line 58) | def load(self) -> None:
method save (line 63) | def save(self) -> None:
method preprocess_sentences (line 66) | def preprocess_sentences(self, sentences, entity_offsets_batch):
method get_markup (line 126) | def get_markup(self, proc_syntax_batch, replace_dict_batch):
method find_cycle (line 193) | def find_cycle(self, ids, head_ids):
method correct_markup (line 200) | def correct_markup(self, words, head_ids, rels, root_n):
method find_root (line 221) | def find_root(self, rels):
method get_elements (line 229) | def get_elements(self, markup_elem):
method correct_cycle (line 238) | def correct_cycle(self, ids, head_ids, rels, markup_elem):
method process_markup (line 249) | def process_markup(self, markup_batch):
method __call__ (line 287) | def __call__(self, sentences, entity_offsets_batch):
class TreeToSparql (line 296) | class TreeToSparql(Component):
method __init__ (line 301) | def __init__(self, sparql_queries_filename: str, syntax_parser: Compon...
method __call__ (line 330) | def __call__(self, questions_batch: List[str], substr_batch: List[List...
method sort_substr (line 463) | def sort_substr(self, substr_batch: List[List[str]], tags_batch: List[...
method syntax_parse (line 480) | def syntax_parse(self, question: str, entity_offsets_list: List[List[i...
method sanitize_question (line 496) | def sanitize_question(self, tree: Node, root: Node, appos_token_nums: ...
method find_root (line 510) | def find_root(self, tree: Node) -> Node:
method find_branch_with_unknown (line 515) | def find_branch_with_unknown(self, root: Node) -> Tuple[str, str]:
method find_modifiers_of_unknown (line 545) | def find_modifiers_of_unknown(self, node: Node) -> Tuple[List[Union[st...
method find_clause_node (line 561) | def find_clause_node(self, root: Node, unknown_branch: Node) -> Tuple[...
method find_entities (line 569) | def find_entities(self, node: Node, positions: List[List[int]]) -> Lis...
method find_year_or_number (line 590) | def find_year_or_number(self, node: Node) -> bool:
method find_year_constraint (line 597) | def find_year_constraint(self, node: Node) -> list:
method find_appos_tokens (line 609) | def find_appos_tokens(self, node: Node, tok_and_ord: List[Tuple[Node, ...
method find_clause_tokens (line 623) | def find_clause_tokens(self, node: Node, tok_and_ord: Dict[int, Node],...
method find_first_last (line 634) | def find_first_last(self, node: Node) -> str:
method find_ranking_tokens (line 651) | def find_ranking_tokens(self, node: Node, appos_token_nums: List[int],...
method choose_grounded_entity (line 662) | def choose_grounded_entity(grounded_entities: List[str], entities_dict...
method build_query (line 676) | def build_query(self, root: Node, unknown_branch: Node, root_desc: Dic...
FILE: deeppavlov/models/kbqa/type_define.py
class AnswerTypesExtractor (line 26) | class AnswerTypesExtractor:
method __init__ (line 29) | def __init__(self, lang: str, types_filename: str, types_sets_filename...
method __call__ (line 58) | def __call__(self, questions_batch: List[str], entity_substr_batch: Li...
FILE: deeppavlov/models/kbqa/utils.py
function find_query_features (line 21) | def find_query_features(query, qualifier_rels=None, question=None, order...
function extract_year (line 38) | def extract_year(question_tokens: List[str], question: str) -> str:
function extract_number (line 61) | def extract_number(question_tokens: List[str], question: str) -> str:
function order_of_answers_sorting (line 78) | def order_of_answers_sorting(question: str) -> str:
function make_combs (line 88) | def make_combs(entity_ids: List[List[str]], permut: bool) -> List[List[s...
function fill_slots (line 105) | def fill_slots(query: str, entity_comb: List[str], type_comb: List[str],...
function correct_variables (line 119) | def correct_variables(query_triplets: List[str], answer_ent: List[str], ...
function query_from_triplets (line 136) | def query_from_triplets(query_triplets: List[str], answer_ent: List[str]...
function fill_query (line 147) | def fill_query(query: List[str], entity_comb: List[str], type_comb: List...
function make_sparql_query (line 172) | def make_sparql_query(query_info: Tuple[List[str], List[str], List[str],...
function merge_sparql_query (line 187) | def merge_sparql_query(query_info: Tuple[List[str], List[str], Dict[str,...
function preprocess_template_queries (line 194) | def preprocess_template_queries(template_queries: Dict[str, Any], kb_pre...
FILE: deeppavlov/models/kbqa/wiki_parser.py
class WikiParser (line 31) | class WikiParser:
method __init__ (line 34) | def __init__(self, wiki_filename: str,
method __call__ (line 88) | def __call__(self, parser_info_list: List[str], queries_list: List[Any...
method execute_queries_list (line 92) | def execute_queries_list(self, parser_info_list: List[str], queries_li...
method execute (line 201) | def execute(self, what_return: List[str],
method define_is_boolean (line 342) | def define_is_boolean(query_hdt_seq):
method merge_combs (line 346) | def merge_combs(comb1, comb2):
method search (line 356) | def search(self, query: List[str], unknown_elem_positions: List[Tuple[...
method find_label (line 396) | def find_label(self, entity: str, question: str = "") -> str:
method format_date (line 458) | def format_date(self, entity, question):
method find_alias (line 484) | def find_alias(self, entity: str) -> List[str]:
method find_rels (line 491) | def find_rels(self, entity: str, direction: str, rel_type: str = "no_t...
method find_rels_2hop (line 513) | def find_rels_2hop(self, entity_ids, rels_1hop):
method find_object (line 537) | def find_object(self, entity: str, rel: str, direction: str) -> List[s...
method check_triplet (line 561) | def check_triplet(self, subj: str, rel: str, obj: str) -> bool:
method find_types (line 584) | def find_types(self, entity: str):
method find_subclasses (line 605) | def find_subclasses(self, entity: str):
method uncompress (line 622) | def uncompress(self, triplets: Union[str, List[List[str]]]) -> List[Li...
method parse_triplets (line 628) | def parse_triplets(self, entity):
method find_triplets (line 639) | def find_triplets(self, subj: str, direction: str) -> Tuple[str, List[...
method fill_triplets (line 648) | def fill_triplets(self, init_triplets, what_to_return, comb):
FILE: deeppavlov/models/morpho_syntax_parser/dependency_decoding.py
class ChuLiuEdmonds (line 25) | class ChuLiuEdmonds(Component):
method __init__ (line 30) | def __init__(self, min_edge_prob=1e-6, **kwargs):
method __call__ (line 33) | def __call__(self, probs: List[np.ndarray]) -> List[List[int]]:
FILE: deeppavlov/models/morpho_syntax_parser/joint.py
class JointTaggerParser (line 25) | class JointTaggerParser(Component):
method __init__ (line 39) | def __init__(self, tagger: Chainer, parser: Chainer,
method __call__ (line 49) | def __call__(self, data: Union[List[str], List[List[str]]]) \
FILE: deeppavlov/models/morpho_syntax_parser/spacy_lemmatizer.py
class SpacyLemmatizer (line 24) | class SpacyLemmatizer(Component):
method __init__ (line 25) | def __init__(self, model: str, **kwargs):
method __call__ (line 28) | def __call__(self, words_batch: List[List[str]]):
FILE: deeppavlov/models/morpho_syntax_parser/syntax_parsing.py
function make_pos_and_tag (line 21) | def make_pos_and_tag(tag: str, sep: str = ",",
class OutputPrettifier (line 46) | class OutputPrettifier(Component):
method __init__ (line 49) | def __init__(self, return_string: bool = True, begin: str = "", end: s...
method prettify (line 56) | def prettify(self, tokens: List[str], heads: List[int], deps: List[str...
method __call__ (line 59) | def __call__(self, X: List[List[str]], Y: List[List[int]], Z: List[Lis...
class DependencyOutputPrettifier (line 72) | class DependencyOutputPrettifier(OutputPrettifier):
method __init__ (line 81) | def __init__(self, return_string: bool = True, begin: str = "", end: s...
method prettify (line 86) | def prettify(self, tokens: List[str], heads: List[int], deps: List[str...
class LemmatizedOutputPrettifier (line 104) | class LemmatizedOutputPrettifier(OutputPrettifier):
method __init__ (line 120) | def __init__(self, return_string: bool = True, begin: str = "", end: s...
method prettify (line 125) | def prettify(self, tokens: List[str], tags: List[str], lemmas: List[st...
FILE: deeppavlov/models/preprocessors/dirty_comments_preprocessor.py
class DirtyCommentsPreprocessor (line 24) | class DirtyCommentsPreprocessor(Component):
method __init__ (line 29) | def __init__(self, remove_punctuation: bool = True, *args, **kwargs):
method __call__ (line 32) | def __call__(self, batch: List[str], **kwargs) -> List[str]:
FILE: deeppavlov/models/preprocessors/dnnc_preprocessor.py
class PairGenerator (line 27) | class PairGenerator(Component):
method __init__ (line 35) | def __init__(self, bidirectional: bool = False, **kwargs) -> None:
method __call__ (line 38) | def __call__(self,
FILE: deeppavlov/models/preprocessors/mask.py
class Mask (line 22) | class Mask(Component):
method __init__ (line 24) | def __init__(self, *args, **kwargs):
method __call__ (line 28) | def __call__(tokens_batch, **kwargs):
FILE: deeppavlov/models/preprocessors/multitask_preprocessor.py
class MultiTaskPipelinePreprocessor (line 12) | class MultiTaskPipelinePreprocessor(Component):
method __init__ (line 30) | def __init__(self,
method split (line 57) | def split(self, features):
method __call__ (line 81) | def __call__(self, *args):
FILE: deeppavlov/models/preprocessors/ner_preprocessor.py
class NerVocab (line 15) | class NerVocab(Estimator):
method __init__ (line 25) | def __init__(self,
method load_from_file (line 44) | def load_from_file(self, filename):
method save_to_file (line 54) | def save_to_file(self, filename):
method fit (line 65) | def fit(self, sents: [List[List[str]]], *args):
method pad_batch (line 80) | def pad_batch(self, tokens: List[List[int]]):
method __call__ (line 109) | def __call__(self, sents, **kwargs):
method load (line 119) | def load(self, *args, **kwargs):
method save (line 124) | def save(self, *args, **kwargs):
method len (line 135) | def len(self):
method t2i (line 139) | def t2i(self):
method i2t (line 143) | def i2t(self):
FILE: deeppavlov/models/preprocessors/odqa_preprocessors.py
class DocumentChunker (line 28) | class DocumentChunker(Component):
method __init__ (line 46) | def __init__(self, sentencize_fn: Callable = sent_tokenize, keep_sente...
method __call__ (line 56) | def __call__(self, batch_docs: List[Union[str, List[str]]],
class StringMultiplier (line 138) | class StringMultiplier(Component):
method __init__ (line 144) | def __init__(self, **kwargs):
method __call__ (line 147) | def __call__(self, batch_s: List[str], ref: List[str]) -> List[List[st...
FILE: deeppavlov/models/preprocessors/one_hotter.py
class OneHotter (line 26) | class OneHotter(Component):
method __init__ (line 37) | def __init__(self, depth: int, pad_zeros: bool = False,
method __call__ (line 45) | def __call__(self, batch: List[List[int]], **kwargs) -> Union[List[Lis...
method _to_one_hot (line 78) | def _to_one_hot(x, n):
FILE: deeppavlov/models/preprocessors/re_preprocessor.py
class REPreprocessor (line 30) | class REPreprocessor(Component):
method __init__ (line 31) | def __init__(
method __call__ (line 66) | def __call__(
method encode_ner_tag (line 194) | def encode_ner_tag(self, ner_tags: List) -> List:
class REPostprocessor (line 205) | class REPostprocessor:
method __init__ (line 207) | def __init__(self, rel2id_path: str, rel2label_path: str, **kwargs):
method __call__ (line 214) | def __call__(self, model_output: List, nf_samples: List) -> Tuple[List...
FILE: deeppavlov/models/preprocessors/response_base_loader.py
class ResponseBaseLoader (line 27) | class ResponseBaseLoader(Serializable):
method __init__ (line 30) | def __init__(self, *args, **kwargs):
method load (line 38) | def load(self):
method save (line 63) | def save(self):
FILE: deeppavlov/models/preprocessors/sanitizer.py
class Sanitizer (line 24) | class Sanitizer(Component):
method __init__ (line 33) | def __init__(self,
method filter_diacritical (line 42) | def filter_diacritical(self, tokens_batch):
method replace_nums (line 53) | def replace_nums(self, tokens_batch):
method __call__ (line 59) | def __call__(self, tokens_batch, **kwargs):
FILE: deeppavlov/models/preprocessors/sentseg_preprocessor.py
function SentSegRestoreSent (line 7) | def SentSegRestoreSent(batch_words: List[List[str]], batch_tags: List[Li...
FILE: deeppavlov/models/preprocessors/squad_preprocessor.py
class SquadBertMappingPreprocessor (line 27) | class SquadBertMappingPreprocessor(Component):
method __init__ (line 33) | def __init__(self, do_lower_case: bool = True, *args, **kwargs):
method __call__ (line 36) | def __call__(self, contexts_batch, bert_features_batch, subtokens_batc...
class SquadBertAnsPreprocessor (line 71) | class SquadBertAnsPreprocessor(Component):
method __init__ (line 77) | def __init__(self, do_lower_case: bool = True, *args, **kwargs):
method __call__ (line 80) | def __call__(self, answers_raw, answers_start, char2subtoks, **kwargs):
class SquadBertAnsPostprocessor (line 104) | class SquadBertAnsPostprocessor(Component):
method __init__ (line 107) | def __init__(self, *args, **kwargs):
method __call__ (line 110) | def __call__(self, answers_start_batch, answers_end_batch, contexts_ba...
method get_char_position (line 138) | def get_char_position(sub2c, sub_pos):
FILE: deeppavlov/models/preprocessors/str_lower.py
function str_lower (line 21) | def str_lower(batch: Union[str, list, tuple]):
FILE: deeppavlov/models/preprocessors/str_token_reverser.py
class StrTokenReverser (line 24) | class StrTokenReverser(Component):
method __init__ (line 31) | def __init__(self, tokenized: bool = False, *args, **kwargs) -> None:
method _reverse_str (line 35) | def _reverse_str(raw_string):
method _reverse_tokens (line 42) | def _reverse_tokens(raw_tokens):
method __call__ (line 46) | def __call__(self, batch: Union[str, list, tuple]) -> StrTokenReverser...
FILE: deeppavlov/models/preprocessors/str_utf8_encoder.py
class StrUTF8Encoder (line 34) | class StrUTF8Encoder(Estimator):
method __init__ (line 47) | def __init__(self,
method __call__ (line 115) | def __call__(self, batch: Union[List[str], Tuple[str]]) -> StrUTF8Enco...
method load (line 132) | def load(self) -> None:
method save (line 145) | def save(self) -> None:
method fit (line 151) | def fit(self, *args) -> None:
method _convert_word_to_char_ids (line 159) | def _convert_word_to_char_ids(self, word):
method _word_to_char_ids (line 185) | def _word_to_char_ids(self, word):
method _encode_chars (line 191) | def _encode_chars(self, sentence):
method _wrap_in_s_char (line 199) | def _wrap_in_s_char(self, chars_ids):
method __len__ (line 207) | def __len__(self):
method len (line 211) | def len(self):
FILE: deeppavlov/models/preprocessors/torch_transformers_preprocessor.py
class TorchTransformersMultiplechoicePreprocessor (line 40) | class TorchTransformersMultiplechoicePreprocessor(Component):
method __init__ (line 54) | def __init__(self,
method tokenize_mc_examples (line 66) | def tokenize_mc_examples(self,
method __call__ (line 96) | def __call__(self, texts_a: List[List[str]], texts_b: List[List[str]] ...
class TorchTransformersPreprocessor (line 116) | class TorchTransformersPreprocessor(Component):
method __init__ (line 131) | def __init__(self,
method __call__ (line 139) | def __call__(self, texts_a: List, texts_b: Optional[List[str]] = None)...
class TorchTransformersEntityRankerPreprocessor (line 174) | class TorchTransformersEntityRankerPreprocessor(Component):
method __init__ (line 186) | def __init__(self,
method __call__ (line 208) | def __call__(self, texts_a: List[str]) -> Tuple[Any, List[int]]:
class TorchSquadTransformersPreprocessor (line 257) | class TorchSquadTransformersPreprocessor(Component):
method __init__ (line 271) | def __init__(self,
method __call__ (line 286) | def __call__(self, question_batch: List[str], context_batch: Optional[...
class RelRankingPreprocessor (line 360) | class RelRankingPreprocessor(Component):
method __init__ (line 369) | def __init__(self,
method __call__ (line 377) | def __call__(self, questions_batch: List[List[str]], rels_batch: List[...
class PathRankingPreprocessor (line 417) | class PathRankingPreprocessor(Component):
method __init__ (line 418) | def __init__(self,
method __call__ (line 430) | def __call__(self, questions_batch: List[str], rels_batch: List[List[L...
class TorchTransformersNerPreprocessor (line 478) | class TorchTransformersNerPreprocessor(Component):
method __init__ (line 503) | def __init__(self,
method __call__ (line 528) | def __call__(self,
method _ner_bert_tokenize (line 619) | def _ner_bert_tokenize(tokens: List[str],
class TorchBertRankerPreprocessor (line 656) | class TorchBertRankerPreprocessor(TorchTransformersPreprocessor):
method __call__ (line 662) | def __call__(self, batch: List[List[str]]) -> List[List[InputFeatures]]:
class RecordFlatExample (line 709) | class RecordFlatExample:
class RecordNestedExample (line 720) | class RecordNestedExample:
class TorchRecordPostprocessor (line 730) | class TorchRecordPostprocessor:
method __init__ (line 741) | def __init__(self, is_binary: bool = False, *args, **kwargs):
method __call__ (line 746) | def __call__(self,
method reset_accumulator (line 788) | def reset_accumulator(self):
class RecordExampleAccumulator (line 794) | class RecordExampleAccumulator:
method __init__ (line 807) | def __init__(self):
method add_flat_example (line 816) | def add_flat_example(self, index: str, label: int, probability: float,...
method ready_to_nest (line 831) | def ready_to_nest(self, index: str) -> bool:
method collect_nested_example (line 840) | def collect_nested_example(self, index: str):
method return_examples (line 866) | def return_examples(self) -> List[RecordNestedExample]:
method get_expected_len (line 882) | def get_expected_len(index: str) -> int:
FILE: deeppavlov/models/preprocessors/transformers_preprocessor.py
function _pad (line 27) | def _pad(data: List[List[Union[int, float]]], value: Union[int, float] =...
class TransformersBertPreprocessor (line 36) | class TransformersBertPreprocessor(Component):
method __init__ (line 37) | def __init__(self, vocab_file: str,
method __call__ (line 47) | def __call__(self, tokens_batch: Union[List[str], List[List[str]]]) ->\
FILE: deeppavlov/models/ranking/metrics.py
function rank_response (line 21) | def rank_response(y_true, y_pred):
function r_at_1_insQA (line 35) | def r_at_1_insQA(y_true, y_pred):
function recall_at_k_insQA (line 39) | def recall_at_k_insQA(y_true, y_pred, k):
FILE: deeppavlov/models/relation_extraction/losses.py
class ATLoss (line 11) | class ATLoss(nn.Module):
method __init__ (line 12) | def __init__(self):
method forward (line 15) | def forward(self, logits: Tensor, labels: Tensor) -> float:
method get_label (line 43) | def get_label(self, logits: Tensor, num_labels: int = -1, threshold: f...
FILE: deeppavlov/models/relation_extraction/relation_extraction_bert.py
class REBertModel (line 16) | class REBertModel(TorchModel):
method __init__ (line 18) | def __init__(
method train_on_batch (line 53) | def train_on_batch(
method __call__ (line 80) | def __call__(
FILE: deeppavlov/models/sklearn/sklearn_component.py
class SklearnComponent (line 34) | class SklearnComponent(Estimator):
method __init__ (line 64) | def __init__(self, model_class: str,
method fit (line 86) | def fit(self, *args) -> None:
method __call__ (line 121) | def __call__(self, *args):
method init_from_scratch (line 159) | def init_from_scratch(self) -> None:
method load (line 185) | def load(self, fname: str = None) -> None:
method save (line 224) | def save(self, fname: str = None) -> None:
method compose_input_data (line 247) | def compose_input_data(x: List[Union[Tuple[Union[np.ndarray, list, spm...
method get_function_params (line 287) | def get_function_params(f: Callable) -> List[str]:
method get_class_attributes (line 300) | def get_class_attributes(cls: type) -> List[str]:
FILE: deeppavlov/models/spelling_correction/brillmoore/error_model.py
class ErrorModel (line 34) | class ErrorModel(Estimator):
method __init__ (line 50) | def __init__(self, dictionary: StaticDictionary, window: int = 1, cand...
method _find_candidates_window_0 (line 70) | def _find_candidates_window_0(self, word, prop_threshold=1e-6):
method _find_candidates_window_n (line 99) | def _find_candidates_window_n(self, word, prop_threshold=1e-6):
method _infer_instance (line 134) | def _infer_instance(self, instance: List[str]) -> List[List[Tuple[floa...
method __call__ (line 147) | def __call__(self, data: Iterable[Iterable[str]], *args, **kwargs) -> ...
method _distance_edits (line 162) | def _distance_edits(seq1, seq2):
method fit (line 182) | def fit(self, x: List[str], y: List[str]):
method save (line 219) | def save(self):
method load (line 230) | def load(self):
FILE: deeppavlov/models/spelling_correction/electors/kenlm_elector.py
class KenlmElector (line 29) | class KenlmElector(Component):
method __init__ (line 41) | def __init__(self, load_path: Path, beam_size: int = 4, *args, **kwargs):
method __call__ (line 45) | def __call__(self, batch: List[List[List[Tuple[float, str]]]]) -> List...
method _infer_instance (line 56) | def _infer_instance(self, candidates: List[List[Tuple[float, str]]]):
FILE: deeppavlov/models/spelling_correction/electors/top1_elector.py
class TopOneElector (line 25) | class TopOneElector(Component):
method __init__ (line 30) | def __init__(self, *args, **kwargs):
method __call__ (line 33) | def __call__(self, batch: List[List[List[Tuple[float, str]]]]) -> List...
FILE: deeppavlov/models/spelling_correction/levenshtein/levenshtein_searcher.py
class LevenshteinSearcher (line 24) | class LevenshteinSearcher:
method __init__ (line 31) | def __init__(self, alphabet, dictionary, operation_costs=None,
method __contains__ (line 56) | def __contains__(self, word):
method search (line 59) | def search(self, word, d, allow_spaces=True, return_cost=True):
method _trie_search (line 70) | def _trie_search(self, word, d, transducer=None,
method _precompute_euristics (line 138) | def _precompute_euristics(self):
method _define_h_function (line 173) | def _define_h_function(self):
method _euristic_h_function (line 179) | def _euristic_h_function(self, suffix, index):
method _minimal_replacement_cost (line 216) | def _minimal_replacement_cost(self, first, second):
function _precompute_absense_costs (line 229) | def _precompute_absense_costs(dictionary, removal_costs, insertion_costs...
class SegmentTransducer (line 287) | class SegmentTransducer:
method __init__ (line 307) | def __init__(self, alphabet, operation_costs=None, allow_spaces=False):
method get_operation_cost (line 324) | def get_operation_cost(self, up, low):
method inverse (line 346) | def inverse(self):
method distance (line 360) | def distance(self, first, second, return_transduction=False):
method transduce (line 405) | def transduce(self, first, second, threshold):
method lower_transductions (line 425) | def lower_transductions(self, word, max_cost, return_cost=True):
method lower (line 457) | def lower(self, word, max_cost, return_cost=True):
method upper (line 471) | def upper(self, word, max_cost, return_cost=True):
method upper_transductions (line 475) | def upper_transductions(self, word, max_cost, return_cost=True):
method _fill_levenshtein_table (line 479) | def _fill_levenshtein_table(self, first, second, update_func, add_pred...
method _make_reversed_operation_costs (line 561) | def _make_reversed_operation_costs(self):
method _make_maximal_key_lengths (line 574) | def _make_maximal_key_lengths(self):
method _backtraces_to_transductions (line 595) | def _backtraces_to_transductions(self, first, second, backtraces, thre...
method _perform_insertions (line 641) | def _perform_insertions(self, initial, max_cost):
method _make_default_operation_costs (line 671) | def _make_default_operation_costs(self, allow_spaces=False):
FILE: deeppavlov/models/spelling_correction/levenshtein/searcher_component.py
class LevenshteinSearcherComponent (line 27) | class LevenshteinSearcherComponent(Component):
method __init__ (line 45) | def __init__(self, words: Iterable[str], max_distance: int = 1, error_...
method _infer_instance (line 54) | def _infer_instance(self, tokens: Iterable[str]) -> List[List[Tuple[fl...
method __call__ (line 66) | def __call__(self, batch: Iterable[Iterable[str]], *args, **kwargs) ->...
FILE: deeppavlov/models/spelling_correction/levenshtein/tabled_trie.py
class Trie (line 21) | class Trie:
method __init__ (line 44) | def __init__(self, alphabet, make_sorted=True, make_alphabet_codes=True,
method initialize (line 58) | def initialize(self):
method _make_default_node (line 66) | def _make_default_node(self):
method save (line 75) | def save(self, outfile):
method make_cashed (line 98) | def make_cashed(self):
method make_numpied (line 105) | def make_numpied(self):
method add (line 110) | def add(self, s):
method fit (line 131) | def fit(self, words):
method terminate (line 136) | def terminate(self):
method __contains__ (line 146) | def __contains__(self, s):
method words (line 153) | def words(self):
method is_final (line 176) | def is_final(self, index):
method find_partitions (line 188) | def find_partitions(self, s, max_count=1):
method __len__ (line 214) | def __len__(self):
method __repr__ (line 217) | def __repr__(self):
method _add_descendant (line 231) | def _add_descendant(self, parent, s, final=False):
method _add_empty_child (line 237) | def _add_empty_child(self, parent, code, final=False):
method _descend_simple (line 248) | def _descend_simple(self, curr, s):
method _descend_cashed (line 258) | def _descend_cashed(self, curr, s):
method _set_final (line 278) | def _set_final(self, curr):
method _get_letters (line 284) | def _get_letters(self, index, return_indexes=False):
method _get_children_and_letters (line 297) | def _get_children_and_letters(self, index, return_indexes=False):
method _get_children (line 308) | def _get_children(self, index):
class TrieMinimizer (line 318) | class TrieMinimizer:
method __init__ (line 319) | def __init__(self):
method minimize (line 322) | def minimize(self, trie, dict_storage=False, make_cashed=False, make_n...
method generate_postorder (line 392) | def generate_postorder(self, trie):
function load_trie (line 416) | def load_trie(infile):
function make_trie (line 464) | def make_trie(alphabet, words, compressed=True, is_numpied=False,
function precompute_future_symbols (line 478) | def precompute_future_symbols(trie, n, allow_spaces=False):
FILE: deeppavlov/models/tokenizers/lazy_tokenizer.py
function lazy_tokenizer (line 25) | def lazy_tokenizer(batch):
FILE: deeppavlov/models/tokenizers/nltk_moses_tokenizer.py
class NLTKMosesTokenizer (line 23) | class NLTKMosesTokenizer(Component):
method __init__ (line 35) | def __init__(self, escape: bool = False, *args, **kwargs):
method __call__ (line 40) | def __call__(self, batch: List[Union[str, List[str]]]) -> List[Union[L...
FILE: deeppavlov/models/tokenizers/nltk_tokenizer.py
class NLTKTokenizer (line 24) | class NLTKTokenizer(Component):
method __init__ (line 35) | def __init__(self, tokenizer: str = "wordpunct_tokenize", download: bo...
method __call__ (line 43) | def __call__(self, batch: List[str]) -> List[List[str]]:
FILE: deeppavlov/models/tokenizers/spacy_tokenizer.py
function _try_load_spacy_model (line 29) | def _try_load_spacy_model(model_name: str, disable: Iterable[str] = ()):
class StreamSpacyTokenizer (line 44) | class StreamSpacyTokenizer(Component):
method __init__ (line 78) | def __init__(self, disable: Optional[Iterable[str]] = None, filter_sto...
method __call__ (line 95) | def __call__(self, batch: Union[List[str], List[List[str]]]) -> Union[...
method _tokenize (line 119) | def _tokenize(self, data: List[str], ngram_range: Optional[Tuple[int, ...
method _lemmatize (line 155) | def _lemmatize(self, data: List[str], ngram_range: Optional[Tuple[int,...
method _filter (line 189) | def _filter(self, items: List[str], alphas_only: bool = True) -> List[...
method set_stopwords (line 212) | def set_stopwords(self, stopwords: List[str]) -> None:
FILE: deeppavlov/models/tokenizers/split_tokenizer.py
class SplitTokenizer (line 22) | class SplitTokenizer(Component):
method __init__ (line 29) | def __init__(self, **kwargs) -> None:
method __call__ (line 32) | def __call__(self, batch: List[str]) -> List[List[str]]:
FILE: deeppavlov/models/tokenizers/utils.py
function detokenize (line 19) | def detokenize(tokens):
function ngramize (line 38) | def ngramize(items: List[str], ngram_range=(1, 1), doc: str = None) -> G...
FILE: deeppavlov/models/torch_bert/crf.py
class CRF (line 7) | class CRF(CRFbase):
method __init__ (line 12) | def __init__(self, num_tags: int, batch_first: bool = False) -> None:
method forward (line 21) | def forward(self, tags_batch: torch.LongTensor, y_masks: np.ndarray):
FILE: deeppavlov/models/torch_bert/multitask_transformer.py
class FocalLoss (line 35) | class FocalLoss(nn.Module):
method __init__ (line 38) | def __init__(self, alpha=.5, gamma=2, categorical_loss=False, weight=N...
method forward (line 45) | def forward(self, inputs, targets):
function SoftCrossEntropyLoss (line 57) | def SoftCrossEntropyLoss(inputs, targets):
function we_transform_input (line 62) | def we_transform_input(name):
class BertForMultiTask (line 66) | class BertForMultiTask(nn.Module):
method __init__ (line 77) | def __init__(self, tasks_num_classes, multilabel, task_types,
method get_logits (line 120) | def get_logits(self, task_id, input_ids, attention_mask, token_type_ids):
method predict_on_top (line 152) | def predict_on_top(self, task_id, last_hidden_state, labels=None):
method forward (line 235) | def forward(self, task_id, input_ids, attention_mask, token_type_ids, ...
class MultiTaskTransformer (line 241) | class MultiTaskTransformer(TorchModel):
method __init__ (line 260) | def __init__(
method _reset_cache (line 319) | def _reset_cache(self):
method load (line 322) | def load(self, fname: Optional[str] = None, *args, **kwargs) -> None:
method _make_input (line 332) | def _make_input(self, task_features, task_id, labels=None):
method __call__ (line 389) | def __call__(self, *args):
method train_on_batch (line 474) | def train_on_batch(self, *args):
FILE: deeppavlov/models/torch_bert/torch_bert_ranker.py
class TorchBertRankerModel (line 33) | class TorchBertRankerModel(TorchModel):
method __init__ (line 46) | def __init__(self, pretrained_bert: str = None,
method train_on_batch (line 98) | def train_on_batch(self, features_li: List[List[InputFeatures]], y: Un...
method __call__ (line 125) | def __call__(self, features_li: List[List[InputFeatures]]) -> Union[Li...
FILE: deeppavlov/models/torch_bert/torch_transformers_classifier.py
class TorchTransformersClassifierModel (line 34) | class TorchTransformersClassifierModel(TorchModel):
method __init__ (line 51) | def __init__(self, n_classes,
method train_on_batch (line 123) | def train_on_batch(self, features: Dict[str, torch.tensor], y: Union[L...
method __call__ (line 156) | def __call__(self, features: Dict[str, torch.tensor]) -> Union[List[in...
method accepted_keys (line 196) | def accepted_keys(self) -> Tuple[str]:
class AutoModelForBinaryClassification (line 204) | class AutoModelForBinaryClassification(torch.nn.Module):
method __init__ (line 206) | def __init__(self, pretrained_bert, config):
method forward (line 216) | def forward(self,
class BinaryClassificationHead (line 257) | class BinaryClassificationHead(torch.nn.Module):
method __init__ (line 258) | def __init__(self, config):
method init_weights (line 267) | def init_weights(self):
method forward (line 272) | def forward(self, features, **kwargs):
FILE: deeppavlov/models/torch_bert/torch_transformers_el_ranker.py
class TorchTransformersElRanker (line 36) | class TorchTransformersElRanker(TorchModel):
method __init__ (line 48) | def __init__(
method train_on_batch (line 71) | def train_on_batch(self, q_features: List[Dict],
method __call__ (line 104) | def __call__(self, q_features: List[Dict],
method save (line 137) | def save(self, fname: Optional[str] = None, *args, **kwargs) -> None:
class TextEncoder (line 153) | class TextEncoder(nn.Module):
method __init__ (line 161) | def __init__(self, pretrained_bert: str = None,
method forward (line 174) | def forward(self,
method load (line 195) | def load(self) -> None:
class BilinearRanking (line 211) | class BilinearRanking(nn.Module):
method __init__ (line 219) | def __init__(self, n_classes: int = 2, emb_size: int = 768, block_size...
method forward (line 227) | def forward(self, text1: Tensor, text2: Tensor):
class SiameseBertElModel (line 237) | class SiameseBertElModel(nn.Module):
method __init__ (line 249) | def __init__(
method forward (line 270) | def forward(
method save (line 299) | def save(self) -> None:
class TorchTransformersEntityRankerInfer (line 311) | class TorchTransformersEntityRankerInfer:
method __init__ (line 325) | def __init__(self, pretrained_bert,
method __call__ (line 356) | def __call__(self, contexts_batch: List[str],
FILE: deeppavlov/models/torch_bert/torch_transformers_multiplechoice.py
class TorchTransformersMultiplechoiceModel (line 32) | class TorchTransformersMultiplechoiceModel(TorchModel):
method __init__ (line 47) | def __init__(self, n_classes,
method train_on_batch (line 85) | def train_on_batch(self, features: Dict[str, torch.tensor], y: Union[L...
method __call__ (line 110) | def __call__(self, features: Dict[str, torch.tensor]) -> Union[List[in...
FILE: deeppavlov/models/torch_bert/torch_transformers_nll_ranking.py
class TorchTransformersNLLRanker (line 35) | class TorchTransformersNLLRanker(TorchModel):
method __init__ (line 44) | def __init__(
method train_on_batch (line 63) | def train_on_batch(self, input_features: Dict[str, Any], positive_idx:...
method __call__ (line 83) | def __call__(self, input_features: Dict[str, Any]) -> Union[List[int],...
class NLLRanking (line 105) | class NLLRanking(nn.Module):
method __init__ (line 115) | def __init__(
method forward (line 140) | def forward(
method load (line 171) | def load(self) -> None:
FILE: deeppavlov/models/torch_bert/torch_transformers_sequence_tagger.py
function token_from_subtoken (line 32) | def token_from_subtoken(units: torch.Tensor, mask: torch.Tensor) -> torc...
function token_labels_to_subtoken_labels (line 117) | def token_labels_to_subtoken_labels(labels, y_mask, input_mask):
class TorchTransformersSequenceTagger (line 134) | class TorchTransformersSequenceTagger(TorchModel):
method __init__ (line 147) | def __init__(self,
method train_on_batch (line 175) | def train_on_batch(self,
method __call__ (line 214) | def __call__(self,
method load (line 252) | def load(self, fname=None):
method save (line 265) | def save(self, fname: Optional[str] = None, *args, **kwargs) -> None:
FILE: deeppavlov/models/torch_bert/torch_transformers_squad.py
function softmax_mask (line 33) | def softmax_mask(val, mask):
class PassageReaderClassifier (line 38) | class PassageReaderClassifier(torch.nn.Module):
method __init__ (line 46) | def __init__(self, config):
method forward (line 52) | def forward(self, input_ids, attention_mask, token_type_ids):
class TorchTransformersSquad (line 64) | class TorchTransformersSquad(TorchModel):
method __init__ (line 83) | def __init__(self,
method train_on_batch (line 116) | def train_on_batch(self, features: List[List[InputFeatures]],
method accepted_keys (line 162) | def accepted_keys(self) -> Tuple[str]:
method __call__ (line 169) | def __call__(self, features_batch: List[List[InputFeatures]]) -> Tuple[
FILE: deeppavlov/models/torch_bert/torch_transformers_syntax_parser.py
class Biaffine (line 35) | class Biaffine(nn.Module):
method __init__ (line 36) | def __init__(self, in1_features: int, in2_features: int, out_features:...
method forward (line 42) | def forward(self, input1: torch.Tensor, input2: torch.Tensor) -> torch...
class PairwiseBilinear (line 48) | class PairwiseBilinear(nn.Module):
method __init__ (line 53) | def __init__(self, in1_features: int, in2_features: int, out_features:...
method reset_parameters (line 65) | def reset_parameters(self):
method forward (line 71) | def forward(self, input1: torch.Tensor, input2: torch.Tensor) -> torch...
method extra_repr (line 83) | def extra_repr(self) -> str:
function mask_arc (line 90) | def mask_arc(lengths: torch.Tensor, mask_diag: bool = True) -> Optional[...
class SyntaxParserNetwork (line 105) | class SyntaxParserNetwork(torch.nn.Module):
method __init__ (line 112) | def __init__(self, n_deps: int, pretrained_bert: str, encoder_layer_id...
method forward (line 146) | def forward(self, input_ids, attention_mask, subtoken_mask, y_heads=No...
class TorchTransformersSyntaxParser (line 230) | class TorchTransformersSyntaxParser(TorchModel):
method __init__ (line 245) | def __init__(self, pretrained_bert: str,
method train_on_batch (line 259) | def train_on_batch(self, input_ids: Union[List[List[int]], np.ndarray],
method __call__ (line 284) | def __call__(self, input_ids: Union[List[List[int]], np.ndarray],
FILE: deeppavlov/models/vectorizers/hashing_tfidf_vectorizer.py
function hash_ (line 33) | def hash_(token: str, hash_size: int) -> int:
class HashingTfIdfVectorizer (line 47) | class HashingTfIdfVectorizer(Estimator):
method __init__ (line 68) | def __init__(self, tokenizer: Component, hash_size=2 ** 24, doc_index:...
method __call__ (line 90) | def __call__(self, questions: List[str]) -> Sparse:
method get_index2doc (line 129) | def get_index2doc(self) -> Dict[Any, int]:
method get_counts (line 138) | def get_counts(self, docs: List[str], doc_ids: List[Any]) \
method get_count_matrix (line 168) | def get_count_matrix(self, row: List[int], col: List[int], data: List[...
method get_tfidf_matrix (line 187) | def get_tfidf_matrix(count_matrix: Sparse) -> Tuple[Sparse, np.array]:
method save (line 207) | def save(self) -> None:
method reset (line 237) | def reset(self) -> None:
method load (line 248) | def load(self) -> Tuple[Sparse, Dict]:
method partial_fit (line 270) | def partial_fit(self, docs: List[str], doc_ids: List[Any], doc_nums: L...
method fit (line 290) | def fit(self, docs: List[str], doc_ids: List[Any], doc_nums: List[int]...
FILE: deeppavlov/paramsearch.py
function get_best_params (line 43) | def get_best_params(combinations, scores, param_names, target_metric):
function main (line 51) | def main():
FILE: deeppavlov/settings.py
function main (line 24) | def main():
FILE: deeppavlov/utils/benchmarks/benchmarks.py
function split_config (line 71) | def split_config(config_path, download):
function get_predictions (line 90) | def get_predictions(model, data_gen, replace_word=None, round_res=False):
function submit_glue (line 113) | def submit_glue(config_path, output_path, download):
function commonsense_reasoning_prediction (line 154) | def commonsense_reasoning_prediction(model, data_gen):
function multi_sentence_comprehension_prediction (line 183) | def multi_sentence_comprehension_prediction(model, data_gen):
function submit_superglue (line 232) | def submit_superglue(config_path, output_path, download):
function submit_rsg (line 274) | def submit_rsg(config_path, output_path, download):
function main (line 316) | def main():
FILE: deeppavlov/utils/connector/dialog_logger.py
class DialogLogger (line 31) | class DialogLogger:
method __init__ (line 45) | def __init__(self, enabled: bool = False, logger_name: Optional[str] =...
method _get_timestamp_utc_str (line 56) | def _get_timestamp_utc_str() -> str:
method _get_log_file (line 65) | def _get_log_file(self):
method _log (line 77) | def _log(self, utterance: Any, direction: str, dialog_id: Optional[Has...
method log_in (line 109) | def log_in(self, utterance: Any, dialog_id: Optional[Hashable] = None)...
method log_out (line 118) | def log_out(self, utterance: Any, dialog_id: Optional[Hashable] = None...
FILE: deeppavlov/utils/pip_wrapper/pip_wrapper.py
function install (line 30) | def install(*packages):
function get_config_requirements (line 42) | def get_config_requirements(config: [str, Path, dict]):
function install_from_config (line 55) | def install_from_config(config: [str, Path, dict]):
FILE: deeppavlov/utils/server/metrics.py
function metrics (line 30) | def metrics(request: Request) -> Response:
class PrometheusMiddleware (line 34) | class PrometheusMiddleware(BaseHTTPMiddleware):
method __init__ (line 35) | def __init__(self, app: ASGIApp, ignore_paths: Tuple = ()) -> None:
method dispatch (line 39) | async def dispatch(self, request: Request, call_next: RequestResponseE...
FILE: deeppavlov/utils/server/server.py
function get_server_params (line 73) | def get_server_params(model_config: Union[str, Path]) -> Dict:
function get_ssl_params (line 97) | def get_ssl_params(server_params: dict,
function redirect_root_to_docs (line 124) | def redirect_root_to_docs(fast_app: FastAPI, func_name: str, endpoint: s...
function interact (line 134) | def interact(model: Chainer, payload: Dict[str, Optional[List]]) -> List:
function test_interact (line 167) | def test_interact(model: Chainer, payload: Dict[str, Optional[List]]) ->...
function start_model_server (line 176) | def start_model_server(model_config: Path,
FILE: deeppavlov/utils/socket/socket.py
function encode (line 34) | def encode(data: Any) -> bytes:
class SocketServer (line 61) | class SocketServer:
method __init__ (line 86) | def __init__(self,
method start (line 127) | def start(self) -> None:
method _handle_client (line 139) | async def _handle_client(self, reader: asyncio.StreamReader, writer: a...
method _interact (line 170) | async def _interact(self, data: dict) -> bytes:
method _response (line 206) | def _response(status: str = 'OK', payload: Optional[List[Tuple]] = Non...
function start_socket_server (line 221) | def start_socket_server(model_config: Path, socket_type: str, port: Opti...
FILE: deeppavlov/vocabs/typos.py
class StaticDictionary (line 32) | class StaticDictionary:
method __init__ (line 48) | def __init__(self, data_dir: [Path, str] = '', *args, dictionary_name:...
method _get_source (line 90) | def _get_source(data_dir, raw_dictionary_path, *args, **kwargs):
method _normalize (line 97) | def _normalize(word):
class RussianWordsVocab (line 102) | class RussianWordsVocab(StaticDictionary):
method __init__ (line 116) | def __init__(self, data_dir: [Path, str] = '', *args, **kwargs):
method _get_source (line 121) | def _get_source(*args, **kwargs):
class Wiki100KDictionary (line 129) | class Wiki100KDictionary(StaticDictionary):
method __init__ (line 144) | def __init__(self, data_dir: [Path, str] = '', *args, **kwargs):
method _get_source (line 149) | def _get_source(*args, **kwargs):
FILE: deeppavlov/vocabs/wiki_sqlite.py
class WikiSQLiteVocab (line 26) | class WikiSQLiteVocab(SQLiteDataIterator, Component):
method __init__ (line 39) | def __init__(self, load_path: str, join_docs: bool = True, shuffle: bo...
method __call__ (line 43) | def __call__(self, doc_ids: Optional[List[List[Any]]] = None, *args, *...
FILE: setup.py
function read_requirements (line 24) | def read_requirements():
function readme (line 40) | def readme():
FILE: tests/test_quick_start.py
function _override_with_test_values (line 294) | def _override_with_test_values(item: Union[dict, list]) -> None:
function download_config (line 306) | def download_config(config_path):
function install_config (line 341) | def install_config(config_path):
function setup_module (line 351) | def setup_module():
function teardown_module (line 369) | def teardown_module():
function _infer (line 377) | def _infer(config, inputs, download=False):
class TestQuickStart (line 389) | class TestQuickStart(object):
method infer (line 391) | def infer(config_path, qr_list=None, check_outputs=True):
method infer_api (line 406) | def infer_api(config_path, qr_list):
method infer_socket (line 448) | def infer_socket(config_path, socket_type):
method test_inferring_pretrained_model (line 511) | def test_inferring_pretrained_model(self, model, conf_file, model_dir,...
method test_inferring_pretrained_model_api (line 521) | def test_inferring_pretrained_model_api(self, model, conf_file, model_...
method test_inferring_pretrained_model_socket (line 527) | def test_inferring_pretrained_model_socket(self, model, conf_file, mod...
method test_consecutive_training_and_inferring (line 538) | def test_consecutive_training_and_inferring(self, model, conf_file, mo...
function test_crossvalidation (line 563) | def test_crossvalidation():
function test_hashes_existence (line 587) | def test_hashes_existence():
function test_aliases (line 611) | def test_aliases():
FILE: utils/prepare/hashes.py
function tar_md5 (line 27) | def tar_md5(fpath: Union[str, Path], chunk_size: int = 2 ** 16) -> Dict[...
function gzip_md5 (line 44) | def gzip_md5(fpath: Union[str, Path], chunk_size: int = 2 ** 16) -> str:
function zip_md5 (line 52) | def zip_md5(fpath: Union[str, Path], chunk_size: int = 2 ** 16) -> Dict[...
function compute_hashes (line 66) | def compute_hashes(fpath: Union[str, Path]) -> Dict[str, str]:
function main (line 82) | def main(fname: str, outfile: Optional[str] = None) -> None:
FILE: utils/prepare/optimize_ipynb.py
function merge_markdown (line 27) | def merge_markdown(nb: nbf.notebooknode.NotebookNode) -> None:
function drop_metadata (line 45) | def drop_metadata(nb: nbf.notebooknode.NotebookNode) -> None:
function update_file (line 52) | def update_file(path: Path, update_ckpts: bool) -> None:
function main (line 74) | def main() -> None:
FILE: utils/prepare/upload.py
function upload (line 25) | def upload(config_in_file: str, tar_name: str, tar_output_dir: Path):
Condensed preview — 411 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,964K chars).
[
{
"path": ".github/ISSUE_TEMPLATE/bug_report.md",
"chars": 664,
"preview": "---\nname: Bug report\nabout: Report on a bug you encountered\ntitle: ''\nlabels: bug\nassignees: ''\n\n---\n\nWant to contribute"
},
{
"path": ".github/ISSUE_TEMPLATE/config.yml",
"chars": 206,
"preview": "blank_issues_enabled: false\ncontact_links:\n - name: Ask a question\n url: https://forum.deeppavlov.ai/\n about: If "
},
{
"path": ".github/ISSUE_TEMPLATE/feature-request.md",
"chars": 441,
"preview": "---\nname: Feature request\nabout: Suggest a feature to improve the DeepPavlov library\ntitle: ''\nlabels: enhancement\nassig"
},
{
"path": ".gitignore",
"chars": 1397,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": ".readthedocs.yml",
"chars": 192,
"preview": "# .readthedocs.yml\nversion: 2\n\nbuild:\n os: \"ubuntu-20.04\"\n tools:\n python: \"3.10\"\nformats: []\n\npython:\n install:\n "
},
{
"path": "CNAME",
"chars": 13,
"preview": "deeppavlov.ai"
},
{
"path": "Jenkinsfile",
"chars": 1870,
"preview": "node('cuda-module') {\n timestamps {\n try {\n stage('Clean') {\n sh \"rm -rf .[^.] .??* "
},
{
"path": "LICENSE",
"chars": 11434,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "MANIFEST.in",
"chars": 167,
"preview": "include README.MD\ninclude LICENSE\ninclude requirements.txt\ninclude deeppavlov/requirements/*.txt\nrecursive-include deepp"
},
{
"path": "README.md",
"chars": 8009,
"preview": "# DeepPavlov 1.0\n\n[](LICENSE)\n![Python "
},
{
"path": "_config.yml",
"chars": 83,
"preview": "theme: jekyll-theme-leap-day\ngoogle_analytics: UA-139843736-5\ninclude:\n - _static\n"
},
{
"path": "_layouts/default.html",
"chars": 3604,
"preview": "<!doctype html>\n<html lang=\"{{ site.lang | default: \"en-US\" }}\">\n <head>\n <meta charset=\"utf-8\">\n <meta http-equi"
},
{
"path": "deeppavlov/__init__.py",
"chars": 1998,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/__main__.py",
"chars": 66,
"preview": "if __name__ == '__main__':\n from .deep import main\n\n main()\n"
},
{
"path": "deeppavlov/_meta.py",
"chars": 324,
"preview": "__version__ = '1.7.0'\n__author__ = 'Neural Networks and Deep Learning lab, MIPT'\n__description__ = 'An open source libra"
},
{
"path": "deeppavlov/configs/__init__.py",
"chars": 2398,
"preview": "from pathlib import Path\nfrom typing import Iterator, Dict, Union, Iterable\n\n\nclass Struct:\n def __iter__(self) -> It"
},
{
"path": "deeppavlov/configs/classifiers/boolqa_rubert.json",
"chars": 1655,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"boolqa_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/boolqa_data\",\n \"languag"
},
{
"path": "deeppavlov/configs/classifiers/few_shot_roberta.json",
"chars": 1612,
"preview": "{\n \"chainer\": {\n \"in\": [\"texts\", \"dataset\"],\n \"in_y\": [\"y_true\"],\n \"pipe\": [\n {\n \"class_name\": \"dn"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_cola_roberta.json",
"chars": 2751,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_mnli_cased_bert_torch.json",
"chars": 2604,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"glue\",\n \"name\": \"mnli\",\n \"tra"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_mnli_mm_cased_bert_torch.json",
"chars": 2613,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"glue\",\n \"name\": \"mnli\",\n \"tra"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json",
"chars": 2802,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_mrpc_roberta.json",
"chars": 2768,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_qnli_roberta.json",
"chars": 2792,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_qqp_roberta.json",
"chars": 2443,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_rte_cased_bert_torch.json",
"chars": 2583,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"glue\",\n \"name\": \"rte\",\n \"trai"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_rte_roberta_mnli.json",
"chars": 2772,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_sst2_roberta.json",
"chars": 2746,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_stsb_roberta.json",
"chars": 2147,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/glue/glue_wnli_roberta.json",
"chars": 2861,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/insults_kaggle_bert.json",
"chars": 3229,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"basic_classification_reader\",\n \"x\": \"Comment\",\n \"y\": \"Class\",\n \"data"
},
{
"path": "deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_2L.json",
"chars": 2444,
"preview": " {\n \"dataset_reader\": {\n \"class_name\": \"paraphraser_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/paraphraser_data\",\n "
},
{
"path": "deeppavlov/configs/classifiers/paraphraser_convers_distilrubert_6L.json",
"chars": 2444,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"paraphraser_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/paraphraser_data\",\n "
},
{
"path": "deeppavlov/configs/classifiers/paraphraser_rubert.json",
"chars": 2164,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"paraphraser_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/paraphraser_data\",\n "
},
{
"path": "deeppavlov/configs/classifiers/query_pr.json",
"chars": 2861,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"sq_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/query_prediction/query_predict"
},
{
"path": "deeppavlov/configs/classifiers/rusentiment_bert.json",
"chars": 3166,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"basic_classification_reader\",\n \"x\": \"text\",\n \"y\": \"label\",\n \"data_pa"
},
{
"path": "deeppavlov/configs/classifiers/rusentiment_convers_bert.json",
"chars": 3205,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"basic_classification_reader\",\n \"x\": \"text\",\n \"y\": \"label\",\n \"data_pa"
},
{
"path": "deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_2L.json",
"chars": 3400,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"basic_classification_reader\",\n \"x\": \"text\",\n \"y\": \"label\",\n \"data_pa"
},
{
"path": "deeppavlov/configs/classifiers/rusentiment_convers_distilrubert_6L.json",
"chars": 3397,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"basic_classification_reader\",\n \"x\": \"text\",\n \"y\": \"label\",\n \"data_pa"
},
{
"path": "deeppavlov/configs/classifiers/sentiment_sst_conv_bert.json",
"chars": 3167,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"basic_classification_reader\",\n \"x\": \"text\",\n \"y\": \"fine_grained_label\","
},
{
"path": "deeppavlov/configs/classifiers/sentiment_twitter.json",
"chars": 3505,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"basic_classification_reader\",\n \"x\": \"Twit\",\n \"y\": \"Class\",\n \"data_pa"
},
{
"path": "deeppavlov/configs/classifiers/superglue/superglue_boolq_roberta_mnli.json",
"chars": 3021,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/superglue/superglue_copa_roberta.json",
"chars": 2879,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/superglue/superglue_record_roberta.json",
"chars": 3190,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/superglue/superglue_wic_bert.json",
"chars": 2746,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/classifiers/topics_distilbert_base_uncased.json",
"chars": 4042,
"preview": "{\r\n \"dataset_reader\": {\r\n \"class_name\": \"basic_classification_reader\",\r\n \"class_sep\": \";\",\r\n \"x\": \"text\",\r\n "
},
{
"path": "deeppavlov/configs/doc_retrieval/en_ranker_pop_wiki.json",
"chars": 2386,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"odqa_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/odqa/enwiki\",\n \"save_path"
},
{
"path": "deeppavlov/configs/doc_retrieval/en_ranker_tfidf_wiki.json",
"chars": 1762,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"odqa_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/odqa/enwiki\",\n \"save_path"
},
{
"path": "deeppavlov/configs/doc_retrieval/ru_ranker_tfidf_wiki.json",
"chars": 1832,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"odqa_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/odqa/ruwiki\",\n \"save_path"
},
{
"path": "deeppavlov/configs/embedder/bert_embedder.json",
"chars": 1272,
"preview": "{\n \"chainer\": {\n \"in\": [\"texts\"],\n \"pipe\": [\n {\n \"class_name\": \"transformers_bert_preprocessor\",\n "
},
{
"path": "deeppavlov/configs/embedder/bert_sentence_embedder.json",
"chars": 1237,
"preview": "{\n \"chainer\": {\n \"in\": [\"texts\"],\n \"pipe\": [\n {\n \"class_name\": \"transformers_bert_preprocessor\",\n "
},
{
"path": "deeppavlov/configs/entity_extraction/entity_detection_en.json",
"chars": 1406,
"preview": "{\n \"chainer\": {\n \"in\": [\"x\"],\n \"pipe\": [\n {\n \"class_name\": \"ner_chunker\",\n \"batch_size\": 16,\n "
},
{
"path": "deeppavlov/configs/entity_extraction/entity_detection_ru.json",
"chars": 1274,
"preview": "{\n \"chainer\": {\n \"in\": [\"x\"],\n \"pipe\": [\n {\n \"class_name\": \"ner_chunker\",\n \"batch_size\": 16,\n "
},
{
"path": "deeppavlov/configs/entity_extraction/entity_extraction_en.json",
"chars": 798,
"preview": "{\n \"chainer\": {\n \"in\": [\"x\"],\n \"pipe\": [\n {\n \"config_path\": \"{CONFIGS_PATH}/entity_extraction/entity_"
},
{
"path": "deeppavlov/configs/entity_extraction/entity_extraction_ru.json",
"chars": 798,
"preview": "{\n \"chainer\": {\n \"in\": [\"x\"],\n \"pipe\": [\n {\n \"config_path\": \"{CONFIGS_PATH}/entity_extraction/entity_"
},
{
"path": "deeppavlov/configs/entity_extraction/entity_linking_en.json",
"chars": 1955,
"preview": "{\n \"chainer\": {\n \"in\": [\"entity_substr\", \"tags\", \"probas\", \"sentences\", \"entity_offsets\", \"sentences_offsets\"],\n "
},
{
"path": "deeppavlov/configs/entity_extraction/entity_linking_ru.json",
"chars": 2731,
"preview": "{\n \"chainer\": {\n \"in\": [\"entity_substr\", \"tags\", \"probas\", \"sentences\", \"entity_offsets\", \"sentences_offsets\"],\n "
},
{
"path": "deeppavlov/configs/faq/fasttext_logreg.json",
"chars": 2995,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"basic_classification_reader\",\n \"format\": \"json\",\n \"orient\": \"split\",\n "
},
{
"path": "deeppavlov/configs/kbqa/kbqa_cq_en.json",
"chars": 7337,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"lcquad_reader\",\n \"question_types\": [\"statement_property\", \"right-subgraph\""
},
{
"path": "deeppavlov/configs/kbqa/kbqa_cq_ru.json",
"chars": 9468,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"rubq_reader\",\n \"version\": \"2.0\",\n \"question_types\": [\"all\"],\n \"num_s"
},
{
"path": "deeppavlov/configs/kbqa/wiki_parser.json",
"chars": 769,
"preview": "{\n \"chainer\": {\n \"in\": [\"parser_info\", \"query\"],\n \"pipe\": [\n {\n \"class_name\": \"wiki_parser\",\n "
},
{
"path": "deeppavlov/configs/morpho_syntax_parser/morpho_ru_syntagrus_bert.json",
"chars": 3719,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"morphotagger_dataset_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/UD2.3_source"
},
{
"path": "deeppavlov/configs/morpho_syntax_parser/ru_syntagrus_joint_parsing.json",
"chars": 701,
"preview": "{\n \"chainer\": {\n \"in\": [\"x_words\"],\n \"pipe\": [\n {\n \"id\": \"main\",\n \"class_name\": \"joint_tagger_"
},
{
"path": "deeppavlov/configs/morpho_syntax_parser/syntax_ru_syntagrus_bert.json",
"chars": 4016,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"morphotagger_dataset_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/UD2.3_source"
},
{
"path": "deeppavlov/configs/multitask/mt_glue.json",
"chars": 7491,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"multitask_reader\",\n \"task_defaults\": {\n \"class_name\": \"huggingface_da"
},
{
"path": "deeppavlov/configs/multitask/multitask_example.json",
"chars": 6674,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"multitask_reader\",\n \"task_defaults\": {\n \"class_name\": \"huggingface_da"
},
{
"path": "deeppavlov/configs/ner/ner_bert_base.json",
"chars": 1565,
"preview": "{\n \"chainer\": {\n \"in\": [\"x\"],\n \"in_y\": [\"y\"],\n \"pipe\": [\n {\n \"class_name\": \"torch_transformers_ner"
},
{
"path": "deeppavlov/configs/ner/ner_case_agnostic_mdistilbert.json",
"chars": 2984,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/conll2003/\",\n \"datas"
},
{
"path": "deeppavlov/configs/ner/ner_collection3_bert.json",
"chars": 3462,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/collection3/\",\n \"dat"
},
{
"path": "deeppavlov/configs/ner/ner_conll2003_bert.json",
"chars": 3431,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/conll2003/\",\n \"datas"
},
{
"path": "deeppavlov/configs/ner/ner_conll2003_deberta_crf.json",
"chars": 2815,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/conll2003/\",\n \"datas"
},
{
"path": "deeppavlov/configs/ner/ner_ontonotes_bert.json",
"chars": 2955,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/ontonotes/\",\n \"datas"
},
{
"path": "deeppavlov/configs/ner/ner_ontonotes_bert_mult.json",
"chars": 2978,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/ontonotes/\",\n \"datas"
},
{
"path": "deeppavlov/configs/ner/ner_ontonotes_deberta_crf.json",
"chars": 2375,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/ontonotes/\",\n \"datas"
},
{
"path": "deeppavlov/configs/ner/ner_rus_bert.json",
"chars": 3408,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/total_rus/\",\n \"datas"
},
{
"path": "deeppavlov/configs/ner/ner_rus_bert_probas.json",
"chars": 3241,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"sq_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/wiki_ner_rus/wikipedia_dataset"
},
{
"path": "deeppavlov/configs/ner/ner_rus_convers_distilrubert_2L.json",
"chars": 3494,
"preview": " {\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/total_rus/\",\n \"data"
},
{
"path": "deeppavlov/configs/ner/ner_rus_convers_distilrubert_6L.json",
"chars": 3494,
"preview": " {\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/total_rus/\",\n \"data"
},
{
"path": "deeppavlov/configs/odqa/en_odqa_infer_wiki.json",
"chars": 1810,
"preview": "{\n \"chainer\": {\n \"in\": [\"question_raw\"],\n \"out\": [\"answer\", \"answer_score\", \"answer_place\"],\n \"pipe\": [\n "
},
{
"path": "deeppavlov/configs/odqa/en_odqa_pop_infer_wiki.json",
"chars": 1808,
"preview": "{\n \"chainer\": {\n \"in\": [\"question_raw\"],\n \"out\": [\"answer\", \"answer_score\", \"answer_place\"],\n \"pipe\": [\n "
},
{
"path": "deeppavlov/configs/odqa/ru_odqa_infer_wiki.json",
"chars": 1199,
"preview": "{\n \"chainer\": {\n \"in\": [\"question_raw\"],\n \"out\": [\"best_answer\"],\n \"pipe\": [\n {\n \"config_path\": \"{"
},
{
"path": "deeppavlov/configs/ranking/path_ranking_nll_roberta_en.json",
"chars": 1398,
"preview": "{\n \"chainer\": {\n \"in\": [\"question\", \"rels\"],\n \"pipe\": [\n {\n \"class_name\": \"path_ranking_preprocessor\""
},
{
"path": "deeppavlov/configs/ranking/ranking_ubuntu_v2_torch_bert_uncased.json",
"chars": 2112,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"ubuntu_v2_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/ubuntu_v2_data\"\n },\n "
},
{
"path": "deeppavlov/configs/ranking/rel_ranking_nll_bert_ru.json",
"chars": 1366,
"preview": "{\n \"chainer\": {\n \"in\": [\"question\", \"rels\"],\n \"pipe\": [\n {\n \"class_name\": \"path_ranking_preprocessor\""
},
{
"path": "deeppavlov/configs/ranking/rel_ranking_roberta_en.json",
"chars": 2753,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"sq_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/rel_ranking_eng/lcquad_one_rel"
},
{
"path": "deeppavlov/configs/regressors/translation_ranker.json",
"chars": 2342,
"preview": "{\n \"metadata\":\n {\n \"variables\": {\n \"BASE_MODEL\": \"cointegrated/LaBSE-en-ru\",\n \"ROOT_PATH\": \"~/.deeppavlov"
},
{
"path": "deeppavlov/configs/relation_extraction/re_docred.json",
"chars": 2658,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"docred_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/docred/\",\n \"rel2id_path"
},
{
"path": "deeppavlov/configs/relation_extraction/re_rured.json",
"chars": 2740,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"rured_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/rured/\"\n },\n \"dataset_ite"
},
{
"path": "deeppavlov/configs/russian_super_glue/russian_superglue_danetqa_rubert.json",
"chars": 3038,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/russian_super_glue/russian_superglue_lidirus_rubert.json",
"chars": 3092,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/russian_super_glue/russian_superglue_muserc_rubert.json",
"chars": 3039,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/russian_super_glue/russian_superglue_parus_rubert.json",
"chars": 2937,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/russian_super_glue/russian_superglue_rcb_rubert.json",
"chars": 2882,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/russian_super_glue/russian_superglue_rucos_rubert.json",
"chars": 3197,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/russian_super_glue/russian_superglue_russe_rubert.json",
"chars": 2903,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/russian_super_glue/russian_superglue_rwsd_rubert.json",
"chars": 2850,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/russian_super_glue/russian_superglue_terra_rubert.json",
"chars": 2990,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"huggingface_dataset_reader\",\n \"path\": \"{COMPETITION}\",\n \"name\": \"{TASK}"
},
{
"path": "deeppavlov/configs/sentence_segmentation/sentseg_dailydialog_bert.json",
"chars": 3055,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"conll2003_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/dailydialog/\",\n \"dat"
},
{
"path": "deeppavlov/configs/spelling_correction/brillmoore_wikitypos_en.json",
"chars": 2142,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"typos_wikipedia_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}\"\n },\n \"dataset_"
},
{
"path": "deeppavlov/configs/spelling_correction/levenshtein_corrector_ru.json",
"chars": 1589,
"preview": "{\n \"chainer\":{\n \"in\": [\"x\"],\n \"pipe\": [\n {\n \"class_name\": \"str_lower\",\n \"id\": \"lower\",\n "
},
{
"path": "deeppavlov/configs/squad/qa_multisberquad_bert.json",
"chars": 3376,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"multi_squad_dataset_reader\",\n \"dataset\": \"MultiSQuADRuRetrClean\",\n \"url"
},
{
"path": "deeppavlov/configs/squad/qa_nq_psgcls_bert.json",
"chars": 2052,
"preview": "{\n \"chainer\": {\n \"in\": [\"context_raw\", \"question_raw\"],\n \"pipe\": [\n {\n \"class_name\": \"torch_squad_tra"
},
{
"path": "deeppavlov/configs/squad/qa_squad2_bert.json",
"chars": 3938,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"squad_dataset_reader\",\n \"dataset\": \"SQuAD2.0\",\n \"data_path\": \"{DOWNLOAD"
},
{
"path": "deeppavlov/configs/squad/squad_bert.json",
"chars": 3198,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"squad_dataset_reader\",\n \"data_path\": \"{DOWNLOADS_PATH}/squad/\"\n },\n \"dat"
},
{
"path": "deeppavlov/configs/squad/squad_ru_bert.json",
"chars": 3912,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"squad_dataset_reader\",\n \"dataset\": \"SberSQuADClean\",\n \"url\": \"http://fi"
},
{
"path": "deeppavlov/configs/squad/squad_ru_convers_distilrubert_2L.json",
"chars": 3992,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"squad_dataset_reader\",\n \"dataset\": \"SberSQuADClean\",\n \"url\": \"http://fi"
},
{
"path": "deeppavlov/configs/squad/squad_ru_convers_distilrubert_6L.json",
"chars": 3994,
"preview": "{\n \"dataset_reader\": {\n \"class_name\": \"squad_dataset_reader\",\n \"dataset\": \"SberSQuADClean\",\n \"url\": \"http://fi"
},
{
"path": "deeppavlov/core/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "deeppavlov/core/commands/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "deeppavlov/core/commands/infer.py",
"chars": 4354,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/commands/train.py",
"chars": 5377,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/commands/utils.py",
"chars": 6310,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "deeppavlov/core/common/aliases.py",
"chars": 2063,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/base.py",
"chars": 2679,
"preview": "# Copyright 2021 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/chainer.py",
"chars": 12027,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/cross_validation.py",
"chars": 3703,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/errors.py",
"chars": 895,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/file.py",
"chars": 2997,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/log.py",
"chars": 2022,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/log_events.py",
"chars": 2209,
"preview": "# Copyright 2019 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/metrics_registry.json",
"chars": 2724,
"preview": "{\n \"acc\": \"deeppavlov.metrics.accuracy:round_accuracy\",\n \"accuracy\": \"deeppavlov.metrics.accuracy:accuracy\",\n \"averag"
},
{
"path": "deeppavlov/core/common/metrics_registry.py",
"chars": 2280,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/params.py",
"chars": 3956,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/params_search.py",
"chars": 9623,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/paths.py",
"chars": 2086,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/prints.py",
"chars": 877,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/registry.json",
"chars": 12333,
"preview": "{\n \"answer_types_extractor\": \"deeppavlov.models.kbqa.type_define:AnswerTypesExtractor\",\n \"api_requester\": \"deeppavlov."
},
{
"path": "deeppavlov/core/common/registry.py",
"chars": 2778,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/common/requirements_registry.json",
"chars": 7199,
"preview": "{\n \"answer_types_extractor\": [\n \"{DEEPPAVLOV_PATH}/requirements/en_core_web_sm.txt\",\n \"{DEEPPAVLOV_PATH}/requirem"
},
{
"path": "deeppavlov/core/data/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "deeppavlov/core/data/data_fitting_iterator.py",
"chars": 3594,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/data/data_learning_iterator.py",
"chars": 3673,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/data/dataset_reader.py",
"chars": 1074,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/data/simple_vocab.py",
"chars": 6308,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/data/utils.py",
"chars": 22941,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/models/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "deeppavlov/core/models/component.py",
"chars": 1195,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/models/estimator.py",
"chars": 911,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/models/nn_model.py",
"chars": 952,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/models/serializable.py",
"chars": 2235,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/models/torch_model.py",
"chars": 9851,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/trainers/__init__.py",
"chars": 718,
"preview": "# Copyright 2019 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/trainers/fit_trainer.py",
"chars": 11040,
"preview": "# Copyright 2019 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/trainers/nn_trainer.py",
"chars": 16166,
"preview": "# Copyright 2019 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/trainers/torch_trainer.py",
"chars": 1829,
"preview": "# Copyright 2019 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/core/trainers/utils.py",
"chars": 2463,
"preview": "# Copyright 2019 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_iterators/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "deeppavlov/dataset_iterators/basic_classification_iterator.py",
"chars": 6724,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_iterators/huggingface_dataset_iterator.py",
"chars": 2536,
"preview": "# Copyright 2020 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_iterators/morphotagger_iterator.py",
"chars": 3498,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_iterators/multitask_iterator.py",
"chars": 14745,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_iterators/siamese_iterator.py",
"chars": 1488,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_iterators/sqlite_iterator.py",
"chars": 5683,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_iterators/squad_iterator.py",
"chars": 12610,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_iterators/typos_iterator.py",
"chars": 1464,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "deeppavlov/dataset_readers/basic_classification_reader.py",
"chars": 4848,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/boolqa_reader.py",
"chars": 3350,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/conll2003_reader.py",
"chars": 6262,
"preview": "from logging import getLogger\nfrom pathlib import Path\n\nfrom deeppavlov.core.common.registry import register\nfrom deeppa"
},
{
"path": "deeppavlov/dataset_readers/docred_reader.py",
"chars": 20037,
"preview": "# Copyright 2021 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/faq_reader.py",
"chars": 2021,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/huggingface_dataset_reader.py",
"chars": 21045,
"preview": "# Copyright 2020 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/imdb_reader.py",
"chars": 2887,
"preview": "# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with "
},
{
"path": "deeppavlov/dataset_readers/line_reader.py",
"chars": 1415,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/morphotagging_dataset_reader.py",
"chars": 8124,
"preview": "# Copyright 2018 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/multitask_reader.py",
"chars": 2190,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/odqa_reader.py",
"chars": 7941,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/paraphraser_reader.py",
"chars": 2393,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/rel_ranking_reader.py",
"chars": 2408,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/rured_reader.py",
"chars": 7094,
"preview": "import json\nimport os\nimport random\nfrom typing import Dict, List, Tuple\nfrom pathlib import Path\nfrom logging import ge"
},
{
"path": "deeppavlov/dataset_readers/sq_reader.py",
"chars": 4228,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/squad_dataset_reader.py",
"chars": 5796,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/typos_reader.py",
"chars": 5636,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/dataset_readers/ubuntu_v2_reader.py",
"chars": 3321,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/deep.py",
"chars": 4162,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/download.py",
"chars": 6531,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/metrics/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "deeppavlov/metrics/accuracy.py",
"chars": 7951,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/metrics/bleu.py",
"chars": 2880,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/metrics/correlation.py",
"chars": 1201,
"preview": "# Copyright 2020 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/metrics/elmo_metrics.py",
"chars": 1028,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/metrics/fmeasure.py",
"chars": 17610,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/metrics/google_bleu.py",
"chars": 4313,
"preview": "# Copyright 2017 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# "
},
{
"path": "deeppavlov/metrics/log_loss.py",
"chars": 1223,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/metrics/mse.py",
"chars": 1275,
"preview": "# Copyright 2020 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/metrics/recall_at_k.py",
"chars": 1912,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/metrics/record_metrics.py",
"chars": 2888,
"preview": "import re\nimport string\nimport collections\nfrom typing import List\n\nimport numpy as np\n\nfrom deeppavlov.models.preproces"
},
{
"path": "deeppavlov/metrics/roc_auc_score.py",
"chars": 1485,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/metrics/squad_metrics.py",
"chars": 5646,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/models/__init__.py",
"chars": 967,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/models/api_requester/__init__.py",
"chars": 29,
"preview": "from .api_requester import *\n"
},
{
"path": "deeppavlov/models/api_requester/api_requester.py",
"chars": 3650,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/models/api_requester/api_router.py",
"chars": 2279,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/models/classifiers/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "deeppavlov/models/classifiers/cos_sim_classifier.py",
"chars": 5367,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/models/classifiers/dnnc_proba2labels.py",
"chars": 3401,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/models/classifiers/proba2labels.py",
"chars": 3761,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
},
{
"path": "deeppavlov/models/classifiers/re_bert.py",
"chars": 8162,
"preview": "import logging\nfrom pathlib import Path\nfrom typing import Tuple, Union, Any, List\n\nimport torch\nfrom torch import Tenso"
},
{
"path": "deeppavlov/models/classifiers/torch_classification_model.py",
"chars": 5490,
"preview": "# Copyright 2017 Neural Networks and Deep Learning lab, MIPT\n#\n# Licensed under the Apache License, Version 2.0 (the \"Li"
}
]
// ... and 211 more files (download for full content)
About this extraction
This page contains the full source code of the deeppavlov/DeepPavlov GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 411 files (1.8 MB), approximately 453.0k tokens, and a symbol index with 1080 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.