[
  {
    "path": ".github/workflows/generate-docs.yml",
    "content": "name: Publish Docs\n\non:\n  # Trigger the workflow on push or pull request,\n  # but only for the main branch\n  push:\n    branches:\n      - main\n\njobs:\n  deploy:\n    runs-on: ubuntu-latest\n\n    steps:\n      - uses: actions/checkout@v2\n      - name: Set up Python\n        uses: actions/setup-python@v2\n        with:\n          python-version: \"3.7\"\n      - name: Install dependencies\n        run: |\n          python -m pip install --upgrade pip\n          pip install -r docs/requirements.txt\n      - name: Publish Docs\n        run: |\n          mkdocs gh-deploy -t material --force\n"
  },
  {
    "path": ".github/workflows/main.yaml",
    "content": "name: Lint and run tests\n\non:\n  push:\n    branches:\n      - main\n  pull_request:\n    branches:\n      - main\n\njobs:\n  build:\n    runs-on: ubuntu-latest\n    strategy:\n      matrix:\n        python-version: [3.8, 3.9]\n\n    steps:\n      - uses: actions/checkout@v2\n\n      - name: Set up Python ${{ matrix.python-version }}\n        uses: actions/setup-python@v2\n        with:\n          python-version: ${{ matrix.python-version }}\n      \n      - uses: actions/cache@v2\n        # source: https://medium.com/ai2-blog/e9452698e98d\n        with:\n          path: ${{ env.pythonLocation }}\n          key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}\n\n      - name: Install requirements\n        run: |\n          python -m pip install --upgrade pip\n          pip install --upgrade --upgrade-strategy eager -e .[dev]\n\n      - name: Show setup install requires versions\n        run: |\n          pip show transformers torch tqdm protobuf tqdm\n\n      - name: Lint code with black\n        run: |\n          black . --check\n\n      - name: Run quick tests with pytest\n        run: |\n          pytest tests/quick\n\n      - name: Run long tests with pytest\n        if: ${{ matrix.python-version == '3.7' }}\n        run: |\n          pytest tests/long\n"
  },
  {
    "path": ".github/workflows/python-publish.yml",
    "content": "# This workflow will upload a Python Package using Twine when a release is created\n# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries\n\nname: Publish Python Package\n\non:\n  release:\n    types: [published]\n\njobs:\n  deploy:\n\n    runs-on: ubuntu-latest\n\n    steps:\n    - uses: actions/checkout@v2\n    - name: Set up Python\n      uses: actions/setup-python@v2\n      with:\n        python-version: '3.8'\n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade pip\n        pip install setuptools wheel twine\n    - name: Build and publish\n      env:\n        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}\n        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}\n      run: |\n        python setup.py sdist bdist_wheel\n        twine upload dist/*\n"
  },
  {
    "path": ".gitignore",
    "content": "# Custom\n.vscode\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\n.pybuilder/\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n#   For a library or package, you might want to ignore these files since the code is\n#   intended to run in multiple environments; otherwise, check them in:\n# .python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pytype static type analyzer\n.pytype/\n\n# Cython debug symbols\ncython_debug/\n"
  },
  {
    "path": ".readthedocs.yaml",
    "content": "# .readthedocs.yaml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details\n\n# Required\nversion: 2\n\nmkdocs:\n  configuration: mkdocs-rtd.yml\n\n# Optionally build your docs in additional formats such as PDF\nformats:\n  - pdf\n\n# Optionally set the version of Python and requirements required to build your docs\npython:\n  version: 3.7\n"
  },
  {
    "path": "CITATION.cff",
    "content": "cff-version: 1.2.0\nmessage: \"If you use this software, please cite it as below.\"\nauthors:\n- family-names: \"Lu\"\n  given-names: \"Xing Han\"\n  orcid: \"https://orcid.org/0000-0001-9027-8425\"\ntitle: \"DL Translate: A deep learning-based translation library built on Huggingface transformers\"\nversion: 0.3.0\ndoi: 10.5281/zenodo.5230676\ndate-released: 2021-08-21\nurl: \"https://github.com/xhlulu/dl-translate\"\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2021 Xing Han Lu\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE."
  },
  {
    "path": "MANIFEST.in",
    "content": "include CITATION.cff"
  },
  {
    "path": "README.md",
    "content": "# DL Translate\n\n[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5230676.svg)](https://doi.org/10.5281/zenodo.5230676)\n[![Downloads](https://static.pepy.tech/personalized-badge/dl-translate?period=total&units=abbreviation&left_color=grey&right_color=orange&left_text=Downloads)](https://pepy.tech/project/dl-translate)\n[![License](https://img.shields.io/badge/license-MIT-green)](https://github.com/xhluca/dl-translate/blob/main/LICENSE)\n\n\n*A translation library for 200 languages built on Huggingface `transformers`*\n\n💻 [GitHub Repository](https://github.com/xhluca/dl-translate)<br>\n📚 [Documentation](https://xhluca.github.io/dl-translate)<br>\n🐍 [PyPi project](https://pypi.org/project/dl-translate/)<br>\n🧪 [Colab Demo](https://colab.research.google.com/github/xhluca/dl-translate/blob/main/demos/colab_demo.ipynb) / [Kaggle Demo](https://www.kaggle.com/xhlulu/dl-translate-demo/)\n\n\n\n## Quickstart\n\nInstall the library with pip:\n```\npip install dl-translate\n```\n\nTo translate some text:\n\n```python\nimport dl_translate as dlt\n\nmt = dlt.TranslationModel()  # Slow when you load it for the first time\n\ntext_hi = \"संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है\"\nmt.translate(text_hi, source=dlt.lang.HINDI, target=dlt.lang.ENGLISH)\n```\n\nAbove, you can see that `dlt.lang` contains variables representing each of the 200 available languages with auto-complete support. Alternatively, you can specify the language (e.g. \"Arabic\") or the language code (e.g. \"fr\" for French):\n```python\ntext_ar = \"الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا.\"\nmt.translate(text_ar, source=\"Arabic\", target=\"fr\")\n```\n\nIf you want to verify whether a language is available, you can check it:\n```python\nprint(mt.available_languages())  # All languages that you can use\nprint(mt.available_codes())  # Code corresponding to each language accepted\nprint(mt.get_lang_code_map())  # Dictionary of lang -> code\n```\n\n## Usage\n\n### Selecting a device\n\nWhen you load the model, you can specify the device:\n```python\nmt = dlt.TranslationModel(device=\"auto\")\n```\n\nBy default, the value will be `device=\"auto\"`, which means it will use a GPU if possible. You can also explicitly set `device=\"cpu\"` or `device=\"gpu\"`, or some other strings accepted by [`torch.device()`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device). __In general, it is recommend to use a GPU if you want a reasonable processing time.__\n\n### Choosing a different model\n\nBy default, the `m2m100` model will be used. However, there are a few options:\n\n* [mBART-50 Large](https://huggingface.co/transformers/master/model_doc/mbart.html):  Allows translations across 50 languages.\n* [m2m100](https://huggingface.co/transformers/model_doc/m2m_100.html): Allows translations across 100 languages.\n* [nllb-200](https://huggingface.co/docs/transformers/model_doc/nllb) (New in v0.3): Allows translations across 200 languages, and is faster than m2m100 (On RTX A6000, we can see speed up of 3x).\n\nHere's an example:\n```python\n# The default approval\nmt = dlt.TranslationModel(\"m2m100\")  # Shorthand\nmt = dlt.TranslationModel(\"facebook/m2m100_418M\")  # Huggingface repo\n\n# If you want to use mBART-50 Large\nmt = dlt.TranslationModel(\"mbart50\")\nmt = dlt.TranslationModel(\"facebook/mbart-large-50-many-to-many-mmt\")\n\n# Or NLLB-200 (faster and has 200 languages)\nmt = dlt.TranslationModel(\"nllb200\")\nmt = dlt.TranslationModel(\"facebook/nllb-200-distilled-600M\")\n```\n\nNote that the language code will change depending on the model family. To find out the correct language codes, please read the doc page on available languages or run `mt.available_codes()`.\n\nBy default, `dlt.TranslationModel` will download the model from the huggingface repo for [mbart50](https://huggingface.co/facebook/mbart-large-50-one-to-many-mmt), [m2m100](https://huggingface.co/facebook/m2m100_418M), or [nllb200](https://huggingface.co/facebook/nllb-200-distilled-600M) and cache it. It's possible to load the model from a path or a model with a similar format, but you will need to specify the `model_family`:\n```python\nmt = dlt.TranslationModel(\"/path/to/model/directory/\", model_family=\"mbart50\")\nmt = dlt.TranslationModel(\"facebook/m2m100_1.2B\", model_family=\"m2m100\")\nmt = dlt.TranslationModel(\"facebook/nllb-200-distilled-600M\", model_family=\"nllb200\")\n```\n\nNotes:\n* Make sure your tokenizer is also stored in the same directory if you load from a file. \n* The available languages will change if you select a different model, so you will not be able to leverage `dlt.lang` or `dlt.utils`.\n\n### Splitting into sentences\n\nIt is not recommended to use extremely long texts as it takes more time to process. Instead, you can try to break them down into sentences with the help of `nltk`. First install the library with `pip install nltk`, then run:\n```python\nimport nltk\n\nnltk.download(\"punkt\")\n\ntext = \"Mr. Smith went to his favorite cafe. There, he met his friend Dr. Doe.\"\nsents = nltk.tokenize.sent_tokenize(text, \"english\")  # don't use dlt.lang.ENGLISH\n\" \".join(mt.translate(sents, source=dlt.lang.ENGLISH, target=dlt.lang.FRENCH))\n```\n\n### Batch size during translation\n\nIt's possible to set a batch size (i.e. the number of elements processed at once) for `mt.translate` and whether you want to see the progress bar or not:\n\n```python\n# ...\nmt = dlt.TranslationModel()\nmt.translate(text, source, target, batch_size=32, verbose=True)\n```\n\nIf you set `batch_size=None`, it will compute the entire `text` at once rather than splitting into \"chunks\". We recommend lowering `batch_size` if you do not have a lot of RAM or VRAM and run into CUDA memory error. Set a higher value if you are using a high-end GPU and the VRAM is not fully utilized.\n\n\n### `dlt.utils` module\n\nAn alternative to `mt.available_languages()` is the `dlt.utils` module. You can use it to find out which languages and codes are available:\n\n```python\nprint(dlt.utils.available_languages('mbart50'))  # All languages that you can use\nprint(dlt.utils.available_codes('m2m100'))  # Code corresponding to each language accepted\nprint(dlt.utils.get_lang_code_map('nllb200'))  # Dictionary of lang -> code\n```\n\n### Offline usage\n\nUnlike the Google translate or MSFT Translator APIs, this library can be fully used offline. However, you will need to first download the packages and models, and move them to your offline environment to be installed and loaded inside a venv.\n\nFirst, run in your terminal:\n```bash\nmkdir dlt\ncd dlt\nmkdir libraries\npip download -d libraries/ dl-translate\n```\n\nOnce all the required packages are downloaded, you will need to use huggingface hub to download the files. Install it with `pip install huggingface-hub`. Then, run inside Python:\n```python\nimport shutil\nimport huggingface_hub as hub\n\ndirname = hub.snapshot_download(\"facebook/m2m100_418M\")\nshutil.copytree(dirname, \"cached_model_m2m100\")  # Copy to a permanent folder\n```\n\nNow, move everything in the `dlt` directory to your offline environment. Create a virtual environment and run the following in terminal:\n```bash\npip install --no-index --find-links libraries/ dl-translate\n```\n\nNow, run inside Python:\n```python\nimport dl_translate as dlt\n\nmt = dlt.TranslationModel(\"cached_model_m2m100\", model_family=\"m2m100\")\n```\n\n\n## Advanced\n\nIf you have knowledge of PyTorch and Huggingface Transformers, you can access advanced aspects of the library for more customization:\n* **Saving and loading**: If you wish to accelerate the loading time the translation model, you can use `save_obj` and reload it later with `load_obj`. This method is only recommended if you are familiar with `huggingface` and `torch`; please read the docs for more information.\n* **Interacting with underlying model and tokenizer**: When initializing `model`, you can pass in arguments for the underlying BART model and tokenizer with `model_options` and `tokenizer_options` respectively. You can also access the underlying `transformers` with `mt.get_transformers_model()`.\n* **Keyword arguments for the `generate()` method**: When running `mt.translate`, you can also give `generation_options` that is passed to the `generate()` method of the underlying transformer model.\n\nFor more information, please visit the [advanced section of the user guide](https://xhluca.github.io/dl-translate/#advanced).\n\n## Acknowledgement\n\n`dl-translate` is built on top of Huggingface's implementation of two models created by Facebook AI Research.\n\n1. The multilingual BART finetuned on many-to-many translation of over 50 languages, which is [documented here](https://huggingface.co/transformers/master/model_doc/mbart.html) The original paper was written by Tang et. al from Facebook AI Research; you can [find it here](https://arxiv.org/pdf/2008.00401.pdf) and cite it using the following:\n    ```\n    @article{tang2020multilingual,\n        title={Multilingual translation with extensible multilingual pretraining and finetuning},\n        author={Tang, Yuqing and Tran, Chau and Li, Xian and Chen, Peng-Jen and Goyal, Naman and Chaudhary, Vishrav and Gu, Jiatao and Fan, Angela},\n        journal={arXiv preprint arXiv:2008.00401},\n        year={2020}\n    }\n    ```\n2. The transformer model published in [Beyond English-Centric Multilingual Machine Translation](https://arxiv.org/abs/2010.11125) by Fan et. al, which supports over 100 languages. You can cite it here:\n   ```\n   @misc{fan2020englishcentric,\n        title={Beyond English-Centric Multilingual Machine Translation}, \n        author={Angela Fan and Shruti Bhosale and Holger Schwenk and Zhiyi Ma and Ahmed El-Kishky and Siddharth Goyal and Mandeep Baines and Onur Celebi and Guillaume Wenzek and Vishrav Chaudhary and Naman Goyal and Tom Birch and Vitaliy Liptchinsky and Sergey Edunov and Edouard Grave and Michael Auli and Armand Joulin},\n        year={2020},\n        eprint={2010.11125},\n        archivePrefix={arXiv},\n        primaryClass={cs.CL}\n    }\n   ```\n\n3. The [no language left behind](https://arxiv.org/abs/2207.04672) model, which extends NMT to 200+ languages. You can cite it here:\n    ```\n    @misc{nllbteam2022language,\n        title={No Language Left Behind: Scaling Human-Centered Machine Translation}, \n        author={NLLB Team and Marta R. Costa-jussà and James Cross and Onur Çelebi and Maha Elbayad and Kenneth Heafield and Kevin Heffernan and Elahe Kalbassi and Janice Lam and Daniel Licht and Jean Maillard and Anna Sun and Skyler Wang and Guillaume Wenzek and Al Youngblood and Bapi Akula and Loic Barrault and Gabriel Mejia Gonzalez and Prangthip Hansanti and John Hoffman and Semarley Jarrett and Kaushik Ram Sadagopan and Dirk Rowe and Shannon Spruit and Chau Tran and Pierre Andrews and Necip Fazil Ayan and Shruti Bhosale and Sergey Edunov and Angela Fan and Cynthia Gao and Vedanuj Goswami and Francisco Guzmán and Philipp Koehn and Alexandre Mourachko and Christophe Ropers and Safiyyah Saleem and Holger Schwenk and Jeff Wang},\n        year={2022},\n        eprint={2207.04672},\n        archivePrefix={arXiv},\n        primaryClass={cs.CL}\n    }\n    ```\n\n\n`dlt` is a wrapper with useful `utils` to save you time. For huggingface's `transformers`, the following snippet is shown as an example:\n```python\nfrom transformers import MBartForConditionalGeneration, MBart50TokenizerFast\n\narticle_hi = \"संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है\"\narticle_ar = \"الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا.\"\n\nmodel = MBartForConditionalGeneration.from_pretrained(\"facebook/mbart-large-50-many-to-many-mmt\")\ntokenizer = MBart50TokenizerFast.from_pretrained(\"facebook/mbart-large-50-many-to-many-mmt\")\n\n# translate Hindi to French\ntokenizer.src_lang = \"hi_IN\"\nencoded_hi = tokenizer(article_hi, return_tensors=\"pt\")\ngenerated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.lang_code_to_id[\"fr_XX\"])\ntokenizer.batch_decode(generated_tokens, skip_special_tokens=True)\n# => \"Le chef de l 'ONU affirme qu 'il n 'y a pas de solution militaire en Syria.\"\n\n# translate Arabic to English\ntokenizer.src_lang = \"ar_AR\"\nencoded_ar = tokenizer(article_ar, return_tensors=\"pt\")\ngenerated_tokens = model.generate(**encoded_ar, forced_bos_token_id=tokenizer.lang_code_to_id[\"en_XX\"])\ntokenizer.batch_decode(generated_tokens, skip_special_tokens=True)\n# => \"The Secretary-General of the United Nations says there is no military solution in Syria.\"\n```\n\nWith `dlt`, you can run:\n```python\nimport dl_translate as dlt\n\narticle_hi = \"संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है\"\narticle_ar = \"الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا.\"\n\nmt = dlt.TranslationModel()\ntranslated_fr = mt.translate(article_hi, source=dlt.lang.HINDI, target=dlt.lang.FRENCH)\ntranslated_en = mt.translate(article_ar, source=dlt.lang.ARABIC, target=dlt.lang.ENGLISH)\n```\n\nNotice you don't have to think about tokenizers, condition generation, pretrained models, and regional codes; you can just tell the model what to translate!\n\nIf you are experienced with `huggingface`'s ecosystem, then you should be familiar enough with the example above that you wouldn't need this library. However, if you've never heard of huggingface or mBART, then I hope using this library will give you enough motivation to [learn more about them](https://github.com/huggingface/transformers) :)\n"
  },
  {
    "path": "demos/colab_demo.ipynb",
    "content": "{\n  \"nbformat\": 4,\n  \"nbformat_minor\": 0,\n  \"metadata\": {\n    \"kernelspec\": {\n      \"name\": \"python3\",\n      \"display_name\": \"Python 3\",\n      \"language\": \"python\"\n    },\n    \"language_info\": {\n      \"name\": \"python\",\n      \"version\": \"3.7.9\",\n      \"mimetype\": \"text/x-python\",\n      \"codemirror_mode\": {\n        \"name\": \"ipython\",\n        \"version\": 3\n      },\n      \"pygments_lexer\": \"ipython3\",\n      \"nbconvert_exporter\": \"python\",\n      \"file_extension\": \".py\"\n    },\n    \"colab\": {\n      \"name\": \"dl-translate demo.ipynb\",\n      \"provenance\": [],\n      \"collapsed_sections\": []\n    },\n    \"accelerator\": \"GPU\",\n    \"widgets\": {\n      \"application/vnd.jupyter.widget-state+json\": {\n        \"9695e0e8562c4104b8e28a25bf05991e\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_7e7d388cb3ea475098dcca168cba2635\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_7424952a9ad34bc9a0094ed6df2881ab\",\n              \"IPY_MODEL_bd39d9cea7f949babdb9048b41f4055f\"\n            ]\n          }\n        },\n        \"7e7d388cb3ea475098dcca168cba2635\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"7424952a9ad34bc9a0094ed6df2881ab\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_fee217673faf4424883be8bf33574584\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 3708092,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 3708092,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_2ad1b48139f14387bac9b6046b0d8d60\"\n          }\n        },\n        \"bd39d9cea7f949babdb9048b41f4055f\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_79733c788f914b66a5803f1b71554c26\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 3.71M/3.71M [00:03&lt;00:00, 1.22MB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_cdc4f3fc6bf34edfa6ed14d1edc1c0da\"\n          }\n        },\n        \"fee217673faf4424883be8bf33574584\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"2ad1b48139f14387bac9b6046b0d8d60\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"79733c788f914b66a5803f1b71554c26\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"cdc4f3fc6bf34edfa6ed14d1edc1c0da\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"451f6d64f9d14cfe821ef82af4313223\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_d213c156b7f941de86f89704200898cc\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_d2ab70c5a6b24d3582e13bd7300e4b48\",\n              \"IPY_MODEL_d596d132f81a4577ab6eeff01c858f4a\"\n            ]\n          }\n        },\n        \"d213c156b7f941de86f89704200898cc\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"d2ab70c5a6b24d3582e13bd7300e4b48\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_e4ef14da423a4e37b23815228dff047c\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 2423393,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 2423393,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_f489df53fce4489c85248d0a0fc0347b\"\n          }\n        },\n        \"d596d132f81a4577ab6eeff01c858f4a\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_347117e2b4084588af0120de4cba9992\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 2.42M/2.42M [00:00&lt;00:00, 5.19MB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_f7dde0ddeb954feeb6334f91957b3b4f\"\n          }\n        },\n        \"e4ef14da423a4e37b23815228dff047c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"f489df53fce4489c85248d0a0fc0347b\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"347117e2b4084588af0120de4cba9992\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"f7dde0ddeb954feeb6334f91957b3b4f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"d85f7d324e534e7fb7665440581927af\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_36a4ac5796544fe5b50e802cd0b4b599\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_65782adbd1e943f39a3d282e4f217daf\",\n              \"IPY_MODEL_d78f6159d9fe4b0e840b2d1912a253d8\"\n            ]\n          }\n        },\n        \"36a4ac5796544fe5b50e802cd0b4b599\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"65782adbd1e943f39a3d282e4f217daf\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_db2021d70d584ef987b1580962ad919c\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 272,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 272,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_2cc9fc8c66aa4381ab3a6e742d0547dc\"\n          }\n        },\n        \"d78f6159d9fe4b0e840b2d1912a253d8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_256273b11d094044a36d0056498cc487\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 272/272 [00:01&lt;00:00, 236B/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_361e88b5998d4df996113c7b6c55bd1e\"\n          }\n        },\n        \"db2021d70d584ef987b1580962ad919c\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"2cc9fc8c66aa4381ab3a6e742d0547dc\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"256273b11d094044a36d0056498cc487\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"361e88b5998d4df996113c7b6c55bd1e\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"0236fdbddbcc4049b48713bb17ba4b74\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_878fc2e400354baf84a8a134d9038174\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_e2a4fd0970f94aeb8b841870d95662a8\",\n              \"IPY_MODEL_bc263f4b7ac94cfeaa7609341d5394e2\"\n            ]\n          }\n        },\n        \"878fc2e400354baf84a8a134d9038174\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"e2a4fd0970f94aeb8b841870d95662a8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_266b8b8d1dda42afa1b5fdafb66af4ac\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 1140,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1140,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_dc8604c9207b4a8ca364a817c6516378\"\n          }\n        },\n        \"bc263f4b7ac94cfeaa7609341d5394e2\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_f1958b44d65243a0b4ca5ba2803d3598\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 1.14k/1.14k [00:00&lt;00:00, 2.38kB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_d26580979feb41cdb423d151bd5ebd15\"\n          }\n        },\n        \"266b8b8d1dda42afa1b5fdafb66af4ac\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"dc8604c9207b4a8ca364a817c6516378\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"f1958b44d65243a0b4ca5ba2803d3598\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"d26580979feb41cdb423d151bd5ebd15\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"e7a58c85b79041eaaeeb9a9c3fa102aa\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_57b95194b9c0404d84ee4a73208e10b0\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_477c9d28d9624f448b75b8c38207fed5\",\n              \"IPY_MODEL_e2e7b8f3477c4fd486a0602377992efe\"\n            ]\n          }\n        },\n        \"57b95194b9c0404d84ee4a73208e10b0\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"477c9d28d9624f448b75b8c38207fed5\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_72755dae20114e74a26fa5f556eed8c8\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading: 100%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"success\",\n            \"max\": 908,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 908,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_c961071096794f7580d0d37db02e8ff0\"\n          }\n        },\n        \"e2e7b8f3477c4fd486a0602377992efe\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_cf26e32a7c6b48caa2d7727365d33da3\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 908/908 [00:00&lt;00:00, 25.8kB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_2edbb013778f4cb2a05510875c8b1d66\"\n          }\n        },\n        \"72755dae20114e74a26fa5f556eed8c8\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"c961071096794f7580d0d37db02e8ff0\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"cf26e32a7c6b48caa2d7727365d33da3\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"2edbb013778f4cb2a05510875c8b1d66\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"b4bb0987873c46f9865e5308e2cd6cab\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HBoxModel\",\n          \"state\": {\n            \"_view_name\": \"HBoxView\",\n            \"_dom_classes\": [],\n            \"_model_name\": \"HBoxModel\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"box_style\": \"\",\n            \"layout\": \"IPY_MODEL_8fa51cc1231146d9be01452d4baedcab\",\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"children\": [\n              \"IPY_MODEL_5123c4c6e7524b4983ca0701394af3a0\",\n              \"IPY_MODEL_4d97b210440f431bb5440d5b3b49032b\"\n            ]\n          }\n        },\n        \"8fa51cc1231146d9be01452d4baedcab\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"5123c4c6e7524b4983ca0701394af3a0\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"FloatProgressModel\",\n          \"state\": {\n            \"_view_name\": \"ProgressView\",\n            \"style\": \"IPY_MODEL_d5a94e8542ac48babbc228b80e8c5c91\",\n            \"_dom_classes\": [],\n            \"description\": \"Downloading:  63%\",\n            \"_model_name\": \"FloatProgressModel\",\n            \"bar_style\": \"\",\n            \"max\": 1935796948,\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": 1223096320,\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"orientation\": \"horizontal\",\n            \"min\": 0,\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_47f2a197eb0b47d583ec331a89a7657f\"\n          }\n        },\n        \"4d97b210440f431bb5440d5b3b49032b\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"HTMLModel\",\n          \"state\": {\n            \"_view_name\": \"HTMLView\",\n            \"style\": \"IPY_MODEL_c1998ef598ff4359b0de7961665dba6d\",\n            \"_dom_classes\": [],\n            \"description\": \"\",\n            \"_model_name\": \"HTMLModel\",\n            \"placeholder\": \"​\",\n            \"_view_module\": \"@jupyter-widgets/controls\",\n            \"_model_module_version\": \"1.5.0\",\n            \"value\": \" 1.22G/1.94G [00:22&lt;00:13, 52.5MB/s]\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.5.0\",\n            \"description_tooltip\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\",\n            \"layout\": \"IPY_MODEL_ba0a677b24604277981595701dfc4574\"\n          }\n        },\n        \"d5a94e8542ac48babbc228b80e8c5c91\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"ProgressStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"ProgressStyleModel\",\n            \"description_width\": \"initial\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"bar_color\": null,\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"47f2a197eb0b47d583ec331a89a7657f\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        },\n        \"c1998ef598ff4359b0de7961665dba6d\": {\n          \"model_module\": \"@jupyter-widgets/controls\",\n          \"model_name\": \"DescriptionStyleModel\",\n          \"state\": {\n            \"_view_name\": \"StyleView\",\n            \"_model_name\": \"DescriptionStyleModel\",\n            \"description_width\": \"\",\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"_model_module_version\": \"1.5.0\",\n            \"_view_count\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"_model_module\": \"@jupyter-widgets/controls\"\n          }\n        },\n        \"ba0a677b24604277981595701dfc4574\": {\n          \"model_module\": \"@jupyter-widgets/base\",\n          \"model_name\": \"LayoutModel\",\n          \"state\": {\n            \"_view_name\": \"LayoutView\",\n            \"grid_template_rows\": null,\n            \"right\": null,\n            \"justify_content\": null,\n            \"_view_module\": \"@jupyter-widgets/base\",\n            \"overflow\": null,\n            \"_model_module_version\": \"1.2.0\",\n            \"_view_count\": null,\n            \"flex_flow\": null,\n            \"width\": null,\n            \"min_width\": null,\n            \"border\": null,\n            \"align_items\": null,\n            \"bottom\": null,\n            \"_model_module\": \"@jupyter-widgets/base\",\n            \"top\": null,\n            \"grid_column\": null,\n            \"overflow_y\": null,\n            \"overflow_x\": null,\n            \"grid_auto_flow\": null,\n            \"grid_area\": null,\n            \"grid_template_columns\": null,\n            \"flex\": null,\n            \"_model_name\": \"LayoutModel\",\n            \"justify_items\": null,\n            \"grid_row\": null,\n            \"max_height\": null,\n            \"align_content\": null,\n            \"visibility\": null,\n            \"align_self\": null,\n            \"height\": null,\n            \"min_height\": null,\n            \"padding\": null,\n            \"grid_auto_rows\": null,\n            \"grid_gap\": null,\n            \"max_width\": null,\n            \"order\": null,\n            \"_view_module_version\": \"1.2.0\",\n            \"grid_template_areas\": null,\n            \"object_position\": null,\n            \"object_fit\": null,\n            \"grid_auto_columns\": null,\n            \"margin\": null,\n            \"display\": null,\n            \"left\": null\n          }\n        }\n      }\n    }\n  },\n  \"cells\": [\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"tx6xJha5YIiA\"\n      },\n      \"source\": [\n        \"# DL Translate\\n\",\n        \"\\n\",\n        \"*A deep learning-based translation library built on Huggingface `transformers` and Facebook's `mBART-Large`*\\n\",\n        \"\\n\",\n        \"💻 [GitHub Repository](https://github.com/xhlulu/dl-translate)\\\\\\n\",\n        \"📚 [Documentation](https://git.io/dlt-docs) / [readthedocs](https://dl-translate.readthedocs.io)\\\\\\n\",\n        \"🐍 [PyPi project](https://pypi.org/project/dl-translate/)\"\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"bCjxVhyxYIiD\"\n      },\n      \"source\": [\n        \"## Quickstart\\n\",\n        \"\\n\",\n        \"Install the library with pip:\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"_uuid\": \"8f2839f25d086af736a60e9eeb907d3b93b6e0e5\",\n        \"_cell_guid\": \"b1076dfc-b9ad-4769-8c92-a6c4dae69d19\",\n        \"trusted\": true,\n        \"_kg_hide-input\": false,\n        \"_kg_hide-output\": true,\n        \"id\": \"BI3mAoRnYIiF\",\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\"\n        },\n        \"outputId\": \"c5674616-a545-4a8a-9813-b127c1777efa\"\n      },\n      \"source\": [\n        \"!pip install -q dl-translate\"\n      ],\n      \"execution_count\": 1,\n      \"outputs\": [\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\u001b[K     |████████████████████████████████| 1.2MB 9.2MB/s \\n\",\n            \"\\u001b[K     |████████████████████████████████| 2.2MB 29.2MB/s \\n\",\n            \"\\u001b[K     |████████████████████████████████| 870kB 50.4MB/s \\n\",\n            \"\\u001b[K     |████████████████████████████████| 3.3MB 51.1MB/s \\n\",\n            \"\\u001b[?25h  Building wheel for sacremoses (setup.py) ... \\u001b[?25l\\u001b[?25hdone\\n\"\n          ],\n          \"name\": \"stdout\"\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"p1oeb4czYIiG\"\n      },\n      \"source\": [\n        \"To translate some text:\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"colab\": {\n          \"base_uri\": \"https://localhost:8080/\",\n          \"height\": 300,\n          \"referenced_widgets\": [\n            \"9695e0e8562c4104b8e28a25bf05991e\",\n            \"7e7d388cb3ea475098dcca168cba2635\",\n            \"7424952a9ad34bc9a0094ed6df2881ab\",\n            \"bd39d9cea7f949babdb9048b41f4055f\",\n            \"fee217673faf4424883be8bf33574584\",\n            \"2ad1b48139f14387bac9b6046b0d8d60\",\n            \"79733c788f914b66a5803f1b71554c26\",\n            \"cdc4f3fc6bf34edfa6ed14d1edc1c0da\",\n            \"451f6d64f9d14cfe821ef82af4313223\",\n            \"d213c156b7f941de86f89704200898cc\",\n            \"d2ab70c5a6b24d3582e13bd7300e4b48\",\n            \"d596d132f81a4577ab6eeff01c858f4a\",\n            \"e4ef14da423a4e37b23815228dff047c\",\n            \"f489df53fce4489c85248d0a0fc0347b\",\n            \"347117e2b4084588af0120de4cba9992\",\n            \"f7dde0ddeb954feeb6334f91957b3b4f\",\n            \"d85f7d324e534e7fb7665440581927af\",\n            \"36a4ac5796544fe5b50e802cd0b4b599\",\n            \"65782adbd1e943f39a3d282e4f217daf\",\n            \"d78f6159d9fe4b0e840b2d1912a253d8\",\n            \"db2021d70d584ef987b1580962ad919c\",\n            \"2cc9fc8c66aa4381ab3a6e742d0547dc\",\n            \"256273b11d094044a36d0056498cc487\",\n            \"361e88b5998d4df996113c7b6c55bd1e\",\n            \"0236fdbddbcc4049b48713bb17ba4b74\",\n            \"878fc2e400354baf84a8a134d9038174\",\n            \"e2a4fd0970f94aeb8b841870d95662a8\",\n            \"bc263f4b7ac94cfeaa7609341d5394e2\",\n            \"266b8b8d1dda42afa1b5fdafb66af4ac\",\n            \"dc8604c9207b4a8ca364a817c6516378\",\n            \"f1958b44d65243a0b4ca5ba2803d3598\",\n            \"d26580979feb41cdb423d151bd5ebd15\",\n            \"e7a58c85b79041eaaeeb9a9c3fa102aa\",\n            \"57b95194b9c0404d84ee4a73208e10b0\",\n            \"477c9d28d9624f448b75b8c38207fed5\",\n            \"e2e7b8f3477c4fd486a0602377992efe\",\n            \"72755dae20114e74a26fa5f556eed8c8\",\n            \"c961071096794f7580d0d37db02e8ff0\",\n            \"cf26e32a7c6b48caa2d7727365d33da3\",\n            \"2edbb013778f4cb2a05510875c8b1d66\",\n            \"b4bb0987873c46f9865e5308e2cd6cab\",\n            \"8fa51cc1231146d9be01452d4baedcab\",\n            \"5123c4c6e7524b4983ca0701394af3a0\",\n            \"4d97b210440f431bb5440d5b3b49032b\",\n            \"d5a94e8542ac48babbc228b80e8c5c91\",\n            \"47f2a197eb0b47d583ec331a89a7657f\",\n            \"c1998ef598ff4359b0de7961665dba6d\",\n            \"ba0a677b24604277981595701dfc4574\"\n          ]\n        },\n        \"id\": \"qdefSjR_YIiG\",\n        \"outputId\": \"a1002eb7-cceb-45ee-dbb9-6a7329860af1\"\n      },\n      \"source\": [\n        \"import dl_translate as dlt\\n\",\n        \"\\n\",\n        \"mt = dlt.TranslationModel()\\n\",\n        \"\\n\",\n        \"text_hi = \\\"संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है\\\"\\n\",\n        \"mt.translate(text_hi, source=dlt.lang.HINDI, target=dlt.lang.ENGLISH)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": [\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"9695e0e8562c4104b8e28a25bf05991e\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=3708092.0, style=ProgressStyle(descript…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"451f6d64f9d14cfe821ef82af4313223\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2423393.0, style=ProgressStyle(descript…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"d85f7d324e534e7fb7665440581927af\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=272.0, style=ProgressStyle(description_…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"0236fdbddbcc4049b48713bb17ba4b74\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1140.0, style=ProgressStyle(description…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"e7a58c85b79041eaaeeb9a9c3fa102aa\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=908.0, style=ProgressStyle(description_…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        },\n        {\n          \"output_type\": \"stream\",\n          \"text\": [\n            \"\\n\"\n          ],\n          \"name\": \"stdout\"\n        },\n        {\n          \"output_type\": \"display_data\",\n          \"data\": {\n            \"application/vnd.jupyter.widget-view+json\": {\n              \"model_id\": \"b4bb0987873c46f9865e5308e2cd6cab\",\n              \"version_minor\": 0,\n              \"version_major\": 2\n            },\n            \"text/plain\": [\n              \"HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1935796948.0, style=ProgressStyle(descr…\"\n            ]\n          },\n          \"metadata\": {\n            \"tags\": []\n          }\n        }\n      ]\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"DDQGpznwYIiH\"\n      },\n      \"source\": [\n        \"Above, you can see that `dlt.lang` contains variables representing each of the 50 available languages with auto-complete support. Alternatively, you can specify the language (e.g. \\\"Arabic\\\") or the language code (e.g. \\\"fr\\\" for French):\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"id\": \"yC3LMjmNYIiI\"\n      },\n      \"source\": [\n        \"text_ar = \\\"الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا.\\\"\\n\",\n        \"mt.translate(text_ar, source=\\\"Arabic\\\", target=\\\"fr\\\")\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"erptbvbiYIiI\"\n      },\n      \"source\": [\n        \"If you want to verify whether a language is available, you can check it:\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"_kg_hide-output\": false,\n        \"id\": \"saHalYvsYIiJ\"\n      },\n      \"source\": [\n        \"print(mt.available_languages())  # All languages that you can use\\n\",\n        \"print(mt.available_codes())  # Code corresponding to each language accepted\\n\",\n        \"print(mt.get_lang_code_map())  # Dictionary of lang -> code\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"id\": \"lz3Rq5t0YIiJ\"\n      },\n      \"source\": [\n        \"## Usage\\n\",\n        \"\\n\",\n        \"### Selecting a device\\n\",\n        \"\\n\",\n        \"When you load the model, you can specify the device:\\n\",\n        \"```python\\n\",\n        \"mt = dlt.TranslationModel(device=\\\"auto\\\")\\n\",\n        \"```\\n\",\n        \"\\n\",\n        \"By default, the value will be `device=\\\"auto\\\"`, which means it will use a GPU if possible. You can also explicitly set `device=\\\"cpu\\\"` or `device=\\\"gpu\\\"`, or some other strings accepted by [`torch.device()`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device). __In general, it is recommend to use a GPU if you want a reasonable processing time.__\\n\",\n        \"\\n\",\n        \"Let's check what we originally loaded:\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"id\": \"fKAlEmzbYIiJ\"\n      },\n      \"source\": [\n        \"mt.device\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"Nm7hVlSZYIiL\"\n      },\n      \"source\": [\n        \"### Loading from a path\\n\",\n        \"\\n\",\n        \"By default, `dlt.TranslationModel` will download the model from the [huggingface repo](https://huggingface.co/facebook/mbart-large-50-one-to-many-mmt) and cache it. However, you are free to load from a path:\\n\",\n        \"```python\\n\",\n        \"mt = dlt.TranslationModel(\\\"/path/to/your/model/directory/\\\", model_family=\\\"mbart50\\\")\\n\",\n        \"```\\n\",\n        \"Make sure that your tokenizer is also stored in the same directory if you use this approach.\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"### Using a different model\\n\",\n        \"\\n\",\n        \"You can also choose another model that has [a similar format](https://huggingface.co/models?filter=mbart-50). In those cases, it's preferable to specify the model family:\\n\",\n        \"```python\\n\",\n        \"mt = dlt.TranslationModel(\\\"facebook/mbart-large-50-one-to-many-mmt\\\")\\n\",\n        \"mt = dlt.TranslationModel(\\\"facebook/m2m100_1.2B\\\", model_family=\\\"m2m100\\\")\\n\",\n        \"```\\n\",\n        \"Note that the available languages will change if you do this, so you will not be able to leverage `dlt.lang` or `dlt.utils`.\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"### Breaking down into sentences\\n\",\n        \"\\n\",\n        \"It is not recommended to use extremely long texts as it takes more time to process. Instead, you can try to break them down into sentences with the help of `nltk`. First install the library with `pip install nltk`, then run:\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"_kg_hide-output\": true,\n        \"trusted\": true,\n        \"id\": \"XkjrydidYIiL\"\n      },\n      \"source\": [\n        \"import nltk\\n\",\n        \"nltk.download(\\\"punkt\\\")\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"id\": \"j-cyjxQCYIiL\"\n      },\n      \"source\": [\n        \"text = \\\"Mr. Smith went to his favorite cafe. There, he met his friend Dr. Doe.\\\"\\n\",\n        \"sents = nltk.tokenize.sent_tokenize(text, \\\"english\\\")  # don't use dlt.lang.ENGLISH\\n\",\n        \"\\\" \\\".join(mt.translate(sents, source=dlt.lang.ENGLISH, target=dlt.lang.FRENCH))\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"u6y8-SthYIiM\"\n      },\n      \"source\": [\n        \"### Setting a `batch_size` and verbosity when calling `dlt.TranslationModel.translate`\\n\",\n        \"\\n\",\n        \"It's possible to set a batch size (i.e. the number of elements processed at once) for `mt.translate` and whether you want to see the progress bar or not:\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"id\": \"fcxUFmjAYIiM\"\n      },\n      \"source\": [\n        \"mt.translate(sents, source=dlt.lang.ENGLISH, target=dlt.lang.FRENCH, batch_size=32, verbose=True)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"iS0bfSh_YIiM\"\n      },\n      \"source\": [\n        \"If you set `batch_size=None`, it will compute the entire `text` at once rather than splitting into \\\"chunks\\\". We recommend lowering `batch_size` if you do not have a lot of RAM or VRAM and run into CUDA memory error. Set a higher value if you are using a high-end GPU and the VRAM is not fully utilized.\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"### `dlt.utils` module\\n\",\n        \"\\n\",\n        \"An alternative to `mt.available_languages()` is the `dlt.utils` module. You can use it to find out which languages and codes are available:\\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"id\": \"U7iS_wKTYIiM\"\n      },\n      \"source\": [\n        \"print(dlt.utils.available_languages('mbart50'))  # All languages that you can use\\n\",\n        \"print(dlt.utils.available_codes('mbart50'))  # Code corresponding to each language accepted\\n\",\n        \"print(dlt.utils.get_lang_code_map('mbart50'))  # Dictionary of lang -> code\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"RFN5AplfYIiM\"\n      },\n      \"source\": [\n        \"## Advanced\\n\",\n        \"\\n\",\n        \"The following section assumes you have knowledge of PyTorch and Huggingface Transformers.\\n\",\n        \"\\n\",\n        \"### Saving and loading\\n\",\n        \"\\n\",\n        \"If you wish to accelerate the loading time the translation model, you can use `save_obj`:\\n\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"id\": \"UaDQVFlzYIiN\"\n      },\n      \"source\": [\n        \"mt.save_obj(\\\"saved_model\\\")\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"c_uMZ9exYIiN\"\n      },\n      \"source\": [\n        \"\\n\",\n        \"Then later you can reload it with `load_obj`:\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"id\": \"SEkmXUDaYIiN\"\n      },\n      \"source\": [\n        \"%%time\\n\",\n        \"mt = dlt.TranslationModel.load_obj('saved_model')\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"j4G9tRL8YIiO\"\n      },\n      \"source\": [\n        \"\\n\",\n        \"**Warning:** Only use this if you are certain the torch module saved in `saved_model/weights.pt` can be correctly loaded. Indeed, it is possible that the `huggingface`, `torch` or some other dependencies change between when you called `save_obj` and `load_obj`, and that might break your code. Thus, it is recommend to only run `load_obj` in the same environment/session as `save_obj`. **Note this method might be deprecated in the future once there's no speed benefit in loading this way.**\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"### Interacting with underlying model and tokenizer\\n\",\n        \"\\n\",\n        \"When initializing `model`, you can pass in arguments for the underlying BART model and tokenizer (which will respectively be passed to `MBartForConditionalGeneration.from_pretrained` and `MBart50TokenizerFast.from_pretrained`):\\n\",\n        \"\\n\",\n        \"```python\\n\",\n        \"mt = dlt.TranslationModel(\\n\",\n        \"    model_options=dict(\\n\",\n        \"        state_dict=...,\\n\",\n        \"        cache_dir=...,\\n\",\n        \"        ...\\n\",\n        \"    ),\\n\",\n        \"    tokenizer_options=dict(\\n\",\n        \"        tokenizer_file=...,\\n\",\n        \"        eos_token=...,\\n\",\n        \"        ...\\n\",\n        \"    )\\n\",\n        \")\\n\",\n        \"```\\n\",\n        \"\\n\",\n        \"You can also access the underlying `transformers` model and `tokenizer`:\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"_kg_hide-output\": true,\n        \"trusted\": true,\n        \"id\": \"UX1lyl_uYIiO\"\n      },\n      \"source\": [\n        \"bart = mt.get_transformers_model()\\n\",\n        \"tokenizer = mt.get_tokenizer()\\n\",\n        \"\\n\",\n        \"print(tokenizer)\\n\",\n        \"print(bart)\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"QQPqtEeuYIiO\"\n      },\n      \"source\": [\n        \"See the [huggingface docs](https://huggingface.co/transformers/master/model_doc/mbart.html) for more information.\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"### `bart_model.generate()` keyword arguments\\n\",\n        \"\\n\",\n        \"When running `mt.translate`, you can also give a `generation_options` dictionary that is passed as keyword arguments to the underlying `bart_model.generate()` method:\"\n      ]\n    },\n    {\n      \"cell_type\": \"code\",\n      \"metadata\": {\n        \"trusted\": true,\n        \"id\": \"XuTbvJBWYIiP\"\n      },\n      \"source\": [\n        \"mt.translate(\\n\",\n        \"    sents,\\n\",\n        \"    source=dlt.lang.ENGLISH,\\n\",\n        \"    target=dlt.lang.SPANISH,\\n\",\n        \"    generation_options=dict(num_beams=5, max_length=128)\\n\",\n        \")\"\n      ],\n      \"execution_count\": null,\n      \"outputs\": []\n    },\n    {\n      \"cell_type\": \"markdown\",\n      \"metadata\": {\n        \"id\": \"QvwPa2b1YIiP\"\n      },\n      \"source\": [\n        \"Learn more in the [huggingface docs](https://huggingface.co/transformers/main_classes/model.html#transformers.generation_utils.GenerationMixin.generate).\\n\",\n        \"\\n\",\n        \"\\n\",\n        \"## Acknowledgement\\n\",\n        \"\\n\",\n        \"`dl-translate` is built on top of Huggingface's implementation of two models created by Facebook AI Research.\\n\",\n        \"\\n\",\n        \"1. The multilingual BART finetuned on many-to-many translation of over 50 languages, which is [documented here](https://huggingface.co/transformers/master/model_doc/mbart.html) The original paper was written by Tang et. al from Facebook AI Research; you can [find it here](https://arxiv.org/pdf/2008.00401.pdf) and cite it using the following:\\n\",\n        \"    ```\\n\",\n        \"    @article{tang2020multilingual,\\n\",\n        \"        title={Multilingual translation with extensible multilingual pretraining and finetuning},\\n\",\n        \"        author={Tang, Yuqing and Tran, Chau and Li, Xian and Chen, Peng-Jen and Goyal, Naman and Chaudhary, Vishrav and Gu, Jiatao and Fan, Angela},\\n\",\n        \"        journal={arXiv preprint arXiv:2008.00401},\\n\",\n        \"        year={2020}\\n\",\n        \"    }\\n\",\n        \"    ```\\n\",\n        \"2. The transformer model published in [Beyond English-Centric Multilingual Machine Translation](https://arxiv.org/abs/2010.11125) by Fan et. al, which supports over 100 languages. You can cite it here:\\n\",\n        \"   ```\\n\",\n        \"   @misc{fan2020englishcentric,\\n\",\n        \"        title={Beyond English-Centric Multilingual Machine Translation}, \\n\",\n        \"        author={Angela Fan and Shruti Bhosale and Holger Schwenk and Zhiyi Ma and Ahmed El-Kishky and Siddharth Goyal and Mandeep Baines and Onur Celebi and Guillaume Wenzek and Vishrav Chaudhary and Naman Goyal and Tom Birch and Vitaliy Liptchinsky and Sergey Edunov and Edouard Grave and Michael Auli and Armand Joulin},\\n\",\n        \"        year={2020},\\n\",\n        \"        eprint={2010.11125},\\n\",\n        \"        archivePrefix={arXiv},\\n\",\n        \"        primaryClass={cs.CL}\\n\",\n        \"    }\\n\",\n        \"   ```\\n\",\n        \"\\n\",\n        \"`dlt` is a wrapper with useful `utils` to save you time. For huggingface's `transformers`, the following snippet is shown as an example:\\n\",\n        \"```python\\n\",\n        \"from transformers import MBartForConditionalGeneration, MBart50TokenizerFast\\n\",\n        \"\\n\",\n        \"article_hi = \\\"संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है\\\"\\n\",\n        \"article_ar = \\\"الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا.\\\"\\n\",\n        \"\\n\",\n        \"model = MBartForConditionalGeneration.from_pretrained(\\\"facebook/mbart-large-50-many-to-many-mmt\\\")\\n\",\n        \"tokenizer = MBart50TokenizerFast.from_pretrained(\\\"facebook/mbart-large-50-many-to-many-mmt\\\")\\n\",\n        \"\\n\",\n        \"# translate Hindi to French\\n\",\n        \"tokenizer.src_lang = \\\"hi_IN\\\"\\n\",\n        \"encoded_hi = tokenizer(article_hi, return_tensors=\\\"pt\\\")\\n\",\n        \"generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.lang_code_to_id[\\\"fr_XX\\\"])\\n\",\n        \"tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)\\n\",\n        \"# => \\\"Le chef de l 'ONU affirme qu 'il n 'y a pas de solution militaire en Syria.\\\"\\n\",\n        \"\\n\",\n        \"# translate Arabic to English\\n\",\n        \"tokenizer.src_lang = \\\"ar_AR\\\"\\n\",\n        \"encoded_ar = tokenizer(article_ar, return_tensors=\\\"pt\\\")\\n\",\n        \"generated_tokens = model.generate(**encoded_ar, forced_bos_token_id=tokenizer.lang_code_to_id[\\\"en_XX\\\"])\\n\",\n        \"tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)\\n\",\n        \"# => \\\"The Secretary-General of the United Nations says there is no military solution in Syria.\\\"\\n\",\n        \"```\\n\",\n        \"\\n\",\n        \"With `dlt`, you can run:\\n\",\n        \"```python\\n\",\n        \"import dl_translate as dlt\\n\",\n        \"\\n\",\n        \"article_hi = \\\"संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है\\\"\\n\",\n        \"article_ar = \\\"الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا.\\\"\\n\",\n        \"\\n\",\n        \"mt = dlt.TranslationModel()\\n\",\n        \"translated_fr = mt.translate(article_hi, source=dlt.lang.HINDI, target=dlt.lang.FRENCH)\\n\",\n        \"translated_en = mt.translate(article_ar, source=dlt.lang.ARABIC, target=dlt.lang.ENGLISH)\\n\",\n        \"```\\n\",\n        \"\\n\",\n        \"Notice you don't have to think about tokenizers, condition generation, pretrained models, and regional codes; you can just tell the model what to translate!\\n\",\n        \"\\n\",\n        \"If you are experienced with `huggingface`'s ecosystem, then you should be familiar enough with the example above that you wouldn't need this library. However, if you've never heard of huggingface or mBART, then I hope using this library will give you enough motivation to [learn more about them](https://github.com/huggingface/transformers) :)\"\n      ]\n    }\n  ]\n}"
  },
  {
    "path": "demos/nllb_demo.ipynb",
    "content": "{\"cells\":[{\"cell_type\":\"code\",\"execution_count\":1,\"metadata\":{\"_cell_guid\":\"b1076dfc-b9ad-4769-8c92-a6c4dae69d19\",\"_uuid\":\"8f2839f25d086af736a60e9eeb907d3b93b6e0e5\",\"execution\":{\"iopub.execute_input\":\"2023-07-18T05:15:13.999614Z\",\"iopub.status.busy\":\"2023-07-18T05:15:13.999228Z\",\"iopub.status.idle\":\"2023-07-18T05:15:31.978108Z\",\"shell.execute_reply\":\"2023-07-18T05:15:31.976681Z\",\"shell.execute_reply.started\":\"2023-07-18T05:15:13.999573Z\"},\"trusted\":true},\"outputs\":[],\"source\":[\"!pip install dl-translate==0.3.* -q\"]},{\"cell_type\":\"code\",\"execution_count\":null,\"metadata\":{\"execution\":{\"iopub.execute_input\":\"2023-07-18T05:15:31.982361Z\",\"iopub.status.busy\":\"2023-07-18T05:15:31.981992Z\",\"iopub.status.idle\":\"2023-07-18T05:16:23.731908Z\",\"shell.execute_reply\":\"2023-07-18T05:16:23.730776Z\",\"shell.execute_reply.started\":\"2023-07-18T05:15:31.982327Z\"},\"trusted\":true},\"outputs\":[],\"source\":[\"import dl_translate as dlt\\n\",\"\\n\",\"mt = dlt.TranslationModel(\\\"nllb200\\\")\"]},{\"cell_type\":\"code\",\"execution_count\":null,\"metadata\":{\"execution\":{\"iopub.execute_input\":\"2023-07-18T05:16:23.734336Z\",\"iopub.status.busy\":\"2023-07-18T05:16:23.733295Z\",\"iopub.status.idle\":\"2023-07-18T05:16:28.025038Z\",\"shell.execute_reply\":\"2023-07-18T05:16:28.023933Z\",\"shell.execute_reply.started\":\"2023-07-18T05:16:23.734293Z\"},\"trusted\":true},\"outputs\":[],\"source\":[\"text = \\\"Meta AI has built a single AI model capable of translating across 200 different languages with state-of-the-art quality.\\\"\\n\",\"\\n\",\"# The new translation is much faster than before\\n\",\"%time print(mt.translate(text, source=dlt.lang.nllb200.ENGLISH, target=dlt.lang.nllb200.FRENCH))\"]},{\"cell_type\":\"code\",\"execution_count\":4,\"metadata\":{\"execution\":{\"iopub.execute_input\":\"2023-07-18T05:16:28.028919Z\",\"iopub.status.busy\":\"2023-07-18T05:16:28.028286Z\",\"iopub.status.idle\":\"2023-07-18T05:16:28.717521Z\",\"shell.execute_reply\":\"2023-07-18T05:16:28.716343Z\",\"shell.execute_reply.started\":\"2023-07-18T05:16:28.028882Z\"},\"trusted\":true},\"outputs\":[{\"name\":\"stdout\",\"output_type\":\"stream\",\"text\":[\"मेटाएआई एकमेव एआई मॉडलं निर्मितवान्, यत् 200 भिन्नभाषायां अवधीतमतमतमगुणैः अनुवादं कर्तुं समर्थः अस्ति।\\n\"]}],\"source\":[\"# Sanskrit is now available (previously unavailable)\\n\",\"print(mt.translate(text, source=dlt.lang.nllb200.ENGLISH, target=dlt.lang.nllb200.SANSKRIT))\"]},{\"cell_type\":\"code\",\"execution_count\":5,\"metadata\":{\"execution\":{\"iopub.execute_input\":\"2023-07-18T05:16:28.719596Z\",\"iopub.status.busy\":\"2023-07-18T05:16:28.719227Z\",\"iopub.status.idle\":\"2023-07-18T05:16:29.443696Z\",\"shell.execute_reply\":\"2023-07-18T05:16:29.442668Z\",\"shell.execute_reply.started\":\"2023-07-18T05:16:28.719560Z\"},\"trusted\":true},\"outputs\":[{\"name\":\"stdout\",\"output_type\":\"stream\",\"text\":[\"Meta AI hà custruitu un solu mudellu d'AI capace di tradurisce in 200 lingue sfarenti cù qualità di u statu di l'arte.\\n\"]}],\"source\":[\"# Sicilian is now available (previously unavailable)\\n\",\"print(mt.translate(text, source=dlt.lang.nllb200.ENGLISH, target=dlt.lang.nllb200.SICILIAN))\"]},{\"cell_type\":\"code\",\"execution_count\":6,\"metadata\":{\"execution\":{\"iopub.execute_input\":\"2023-07-18T05:16:29.447147Z\",\"iopub.status.busy\":\"2023-07-18T05:16:29.445331Z\",\"iopub.status.idle\":\"2023-07-18T05:16:30.145637Z\",\"shell.execute_reply\":\"2023-07-18T05:16:30.144623Z\",\"shell.execute_reply.started\":\"2023-07-18T05:16:29.447108Z\"},\"trusted\":true},\"outputs\":[{\"name\":\"stdout\",\"output_type\":\"stream\",\"text\":[\"基於Meta AI 建立咗一個 AI 模型 可以用最先端嘅質量翻譯到 200 個唔同語言\\n\"]}],\"source\":[\"# Yue Chinese is now available (previously unavailable)\\n\",\"print(mt.translate(text, source=dlt.lang.nllb200.ENGLISH, target=dlt.lang.nllb200.YUE_CHINESE))\"]}],\"metadata\":{\"kernelspec\":{\"display_name\":\"Python 3\",\"language\":\"python\",\"name\":\"python3\"},\"language_info\":{\"codemirror_mode\":{\"name\":\"ipython\",\"version\":3},\"file_extension\":\".py\",\"mimetype\":\"text/x-python\",\"name\":\"python\",\"nbconvert_exporter\":\"python\",\"pygments_lexer\":\"ipython3\",\"version\":\"3.10.12\"}},\"nbformat\":4,\"nbformat_minor\":4}\n"
  },
  {
    "path": "dl_translate/__init__.py",
    "content": "from . import lang\nfrom . import utils\nfrom ._translation_model import TranslationModel\n"
  },
  {
    "path": "dl_translate/_pairs.py",
    "content": "# Auto-generated. Do not modify, use scripts/generate_langs.py instead.\n_PAIRS_M2M100 = (\n    (\"Afrikaans\", \"af\"),\n    (\"Amharic\", \"am\"),\n    (\"Arabic\", \"ar\"),\n    (\"Asturian\", \"ast\"),\n    (\"Azerbaijani\", \"az\"),\n    (\"Bashkir\", \"ba\"),\n    (\"Belarusian\", \"be\"),\n    (\"Bulgarian\", \"bg\"),\n    (\"Bengali\", \"bn\"),\n    (\"Breton\", \"br\"),\n    (\"Bosnian\", \"bs\"),\n    (\"Catalan\", \"ca\"),\n    (\"Valencian\", \"ca\"),\n    (\"Cebuano\", \"ceb\"),\n    (\"Czech\", \"cs\"),\n    (\"Welsh\", \"cy\"),\n    (\"Danish\", \"da\"),\n    (\"German\", \"de\"),\n    (\"Greek\", \"el\"),\n    (\"English\", \"en\"),\n    (\"Spanish\", \"es\"),\n    (\"Estonian\", \"et\"),\n    (\"Persian\", \"fa\"),\n    (\"Fulah\", \"ff\"),\n    (\"Finnish\", \"fi\"),\n    (\"French\", \"fr\"),\n    (\"Western Frisian\", \"fy\"),\n    (\"Irish\", \"ga\"),\n    (\"Gaelic\", \"gd\"),\n    (\"Scottish Gaelic\", \"gd\"),\n    (\"Galician\", \"gl\"),\n    (\"Gujarati\", \"gu\"),\n    (\"Hausa\", \"ha\"),\n    (\"Hebrew\", \"he\"),\n    (\"Hindi\", \"hi\"),\n    (\"Croatian\", \"hr\"),\n    (\"Haitian\", \"ht\"),\n    (\"Haitian Creole\", \"ht\"),\n    (\"Hungarian\", \"hu\"),\n    (\"Armenian\", \"hy\"),\n    (\"Indonesian\", \"id\"),\n    (\"Igbo\", \"ig\"),\n    (\"Iloko\", \"ilo\"),\n    (\"Icelandic\", \"is\"),\n    (\"Italian\", \"it\"),\n    (\"Japanese\", \"ja\"),\n    (\"Javanese\", \"jv\"),\n    (\"Georgian\", \"ka\"),\n    (\"Kazakh\", \"kk\"),\n    (\"Khmer\", \"km\"),\n    (\"Central Khmer\", \"km\"),\n    (\"Kannada\", \"kn\"),\n    (\"Korean\", \"ko\"),\n    (\"Luxembourgish\", \"lb\"),\n    (\"Letzeburgesch\", \"lb\"),\n    (\"Ganda\", \"lg\"),\n    (\"Lingala\", \"ln\"),\n    (\"Lao\", \"lo\"),\n    (\"Lithuanian\", \"lt\"),\n    (\"Latvian\", \"lv\"),\n    (\"Malagasy\", \"mg\"),\n    (\"Macedonian\", \"mk\"),\n    (\"Malayalam\", \"ml\"),\n    (\"Mongolian\", \"mn\"),\n    (\"Marathi\", \"mr\"),\n    (\"Malay\", \"ms\"),\n    (\"Burmese\", \"my\"),\n    (\"Nepali\", \"ne\"),\n    (\"Dutch\", \"nl\"),\n    (\"Flemish\", \"nl\"),\n    (\"Norwegian\", \"no\"),\n    (\"Northern Sotho\", \"ns\"),\n    (\"Occitan\", \"oc\"),\n    (\"Oriya\", \"or\"),\n    (\"Panjabi\", \"pa\"),\n    (\"Punjabi\", \"pa\"),\n    (\"Polish\", \"pl\"),\n    (\"Pushto\", \"ps\"),\n    (\"Pashto\", \"ps\"),\n    (\"Portuguese\", \"pt\"),\n    (\"Romanian\", \"ro\"),\n    (\"Moldavian\", \"ro\"),\n    (\"Moldovan\", \"ro\"),\n    (\"Russian\", \"ru\"),\n    (\"Sindhi\", \"sd\"),\n    (\"Sinhala\", \"si\"),\n    (\"Sinhalese\", \"si\"),\n    (\"Slovak\", \"sk\"),\n    (\"Slovenian\", \"sl\"),\n    (\"Somali\", \"so\"),\n    (\"Albanian\", \"sq\"),\n    (\"Serbian\", \"sr\"),\n    (\"Swati\", \"ss\"),\n    (\"Sundanese\", \"su\"),\n    (\"Swedish\", \"sv\"),\n    (\"Swahili\", \"sw\"),\n    (\"Tamil\", \"ta\"),\n    (\"Thai\", \"th\"),\n    (\"Tagalog\", \"tl\"),\n    (\"Tswana\", \"tn\"),\n    (\"Turkish\", \"tr\"),\n    (\"Ukrainian\", \"uk\"),\n    (\"Urdu\", \"ur\"),\n    (\"Uzbek\", \"uz\"),\n    (\"Vietnamese\", \"vi\"),\n    (\"Wolof\", \"wo\"),\n    (\"Xhosa\", \"xh\"),\n    (\"Yiddish\", \"yi\"),\n    (\"Yoruba\", \"yo\"),\n    (\"Chinese\", \"zh\"),\n    (\"Zulu\", \"zu\"),\n)\n_PAIRS_MBART50 = (\n    (\"Arabic\", \"ar_AR\"),\n    (\"Czech\", \"cs_CZ\"),\n    (\"German\", \"de_DE\"),\n    (\"English\", \"en_XX\"),\n    (\"Spanish\", \"es_XX\"),\n    (\"Estonian\", \"et_EE\"),\n    (\"Finnish\", \"fi_FI\"),\n    (\"French\", \"fr_XX\"),\n    (\"Gujarati\", \"gu_IN\"),\n    (\"Hindi\", \"hi_IN\"),\n    (\"Italian\", \"it_IT\"),\n    (\"Japanese\", \"ja_XX\"),\n    (\"Kazakh\", \"kk_KZ\"),\n    (\"Korean\", \"ko_KR\"),\n    (\"Lithuanian\", \"lt_LT\"),\n    (\"Latvian\", \"lv_LV\"),\n    (\"Burmese\", \"my_MM\"),\n    (\"Nepali\", \"ne_NP\"),\n    (\"Dutch\", \"nl_XX\"),\n    (\"Romanian\", \"ro_RO\"),\n    (\"Russian\", \"ru_RU\"),\n    (\"Sinhala\", \"si_LK\"),\n    (\"Turkish\", \"tr_TR\"),\n    (\"Vietnamese\", \"vi_VN\"),\n    (\"Chinese\", \"zh_CN\"),\n    (\"Afrikaans\", \"af_ZA\"),\n    (\"Azerbaijani\", \"az_AZ\"),\n    (\"Bengali\", \"bn_IN\"),\n    (\"Persian\", \"fa_IR\"),\n    (\"Hebrew\", \"he_IL\"),\n    (\"Croatian\", \"hr_HR\"),\n    (\"Indonesian\", \"id_ID\"),\n    (\"Georgian\", \"ka_GE\"),\n    (\"Khmer\", \"km_KH\"),\n    (\"Macedonian\", \"mk_MK\"),\n    (\"Malayalam\", \"ml_IN\"),\n    (\"Mongolian\", \"mn_MN\"),\n    (\"Marathi\", \"mr_IN\"),\n    (\"Polish\", \"pl_PL\"),\n    (\"Pashto\", \"ps_AF\"),\n    (\"Portuguese\", \"pt_XX\"),\n    (\"Swedish\", \"sv_SE\"),\n    (\"Swahili\", \"sw_KE\"),\n    (\"Tamil\", \"ta_IN\"),\n    (\"Telugu\", \"te_IN\"),\n    (\"Thai\", \"th_TH\"),\n    (\"Tagalog\", \"tl_XX\"),\n    (\"Ukrainian\", \"uk_UA\"),\n    (\"Urdu\", \"ur_PK\"),\n    (\"Xhosa\", \"xh_ZA\"),\n    (\"Galician\", \"gl_ES\"),\n    (\"Slovene\", \"sl_SI\"),\n)\n_PAIRS_NLLB200 = (\n    (\"Acehnese (Arabic script)\", \"ace_Arab\"),\n    (\"Acehnese (Latin script)\", \"ace_Latn\"),\n    (\"Mesopotamian Arabic\", \"acm_Arab\"),\n    (\"Ta'izzi-Adeni Arabic\", \"acq_Arab\"),\n    (\"Tunisian Arabic\", \"aeb_Arab\"),\n    (\"Afrikaans\", \"afr_Latn\"),\n    (\"South Levantine Arabic\", \"ajp_Arab\"),\n    (\"Akan\", \"aka_Latn\"),\n    (\"Amharic\", \"amh_Ethi\"),\n    (\"North Levantine Arabic\", \"apc_Arab\"),\n    (\"Modern Standard Arabic\", \"arb_Arab\"),\n    (\"Modern Standard Arabic (Romanized)\", \"arb_Latn\"),\n    (\"Najdi Arabic\", \"ars_Arab\"),\n    (\"Moroccan Arabic\", \"ary_Arab\"),\n    (\"Egyptian Arabic\", \"arz_Arab\"),\n    (\"Assamese\", \"asm_Beng\"),\n    (\"Asturian\", \"ast_Latn\"),\n    (\"Awadhi\", \"awa_Deva\"),\n    (\"Central Aymara\", \"ayr_Latn\"),\n    (\"South Azerbaijani\", \"azb_Arab\"),\n    (\"North Azerbaijani\", \"azj_Latn\"),\n    (\"Bashkir\", \"bak_Cyrl\"),\n    (\"Bambara\", \"bam_Latn\"),\n    (\"Balinese\", \"ban_Latn\"),\n    (\"Belarusian\", \"bel_Cyrl\"),\n    (\"Bemba\", \"bem_Latn\"),\n    (\"Bengali\", \"ben_Beng\"),\n    (\"Bhojpuri\", \"bho_Deva\"),\n    (\"Banjar (Arabic script)\", \"bjn_Arab\"),\n    (\"Banjar (Latin script)\", \"bjn_Latn\"),\n    (\"Standard Tibetan\", \"bod_Tibt\"),\n    (\"Bosnian\", \"bos_Latn\"),\n    (\"Buginese\", \"bug_Latn\"),\n    (\"Bulgarian\", \"bul_Cyrl\"),\n    (\"Catalan\", \"cat_Latn\"),\n    (\"Cebuano\", \"ceb_Latn\"),\n    (\"Czech\", \"ces_Latn\"),\n    (\"Chokwe\", \"cjk_Latn\"),\n    (\"Central Kurdish\", \"ckb_Arab\"),\n    (\"Crimean Tatar\", \"crh_Latn\"),\n    (\"Welsh\", \"cym_Latn\"),\n    (\"Danish\", \"dan_Latn\"),\n    (\"German\", \"deu_Latn\"),\n    (\"Southwestern Dinka\", \"dik_Latn\"),\n    (\"Dyula\", \"dyu_Latn\"),\n    (\"Dzongkha\", \"dzo_Tibt\"),\n    (\"Greek\", \"ell_Grek\"),\n    (\"English\", \"eng_Latn\"),\n    (\"Esperanto\", \"epo_Latn\"),\n    (\"Estonian\", \"est_Latn\"),\n    (\"Basque\", \"eus_Latn\"),\n    (\"Ewe\", \"ewe_Latn\"),\n    (\"Faroese\", \"fao_Latn\"),\n    (\"Fijian\", \"fij_Latn\"),\n    (\"Finnish\", \"fin_Latn\"),\n    (\"Fon\", \"fon_Latn\"),\n    (\"French\", \"fra_Latn\"),\n    (\"Friulian\", \"fur_Latn\"),\n    (\"Nigerian Fulfulde\", \"fuv_Latn\"),\n    (\"Scottish Gaelic\", \"gla_Latn\"),\n    (\"Irish\", \"gle_Latn\"),\n    (\"Galician\", \"glg_Latn\"),\n    (\"Guarani\", \"grn_Latn\"),\n    (\"Gujarati\", \"guj_Gujr\"),\n    (\"Haitian Creole\", \"hat_Latn\"),\n    (\"Hausa\", \"hau_Latn\"),\n    (\"Hebrew\", \"heb_Hebr\"),\n    (\"Hindi\", \"hin_Deva\"),\n    (\"Chhattisgarhi\", \"hne_Deva\"),\n    (\"Croatian\", \"hrv_Latn\"),\n    (\"Hungarian\", \"hun_Latn\"),\n    (\"Armenian\", \"hye_Armn\"),\n    (\"Igbo\", \"ibo_Latn\"),\n    (\"Ilocano\", \"ilo_Latn\"),\n    (\"Indonesian\", \"ind_Latn\"),\n    (\"Icelandic\", \"isl_Latn\"),\n    (\"Italian\", \"ita_Latn\"),\n    (\"Javanese\", \"jav_Latn\"),\n    (\"Japanese\", \"jpn_Jpan\"),\n    (\"Kabyle\", \"kab_Latn\"),\n    (\"Jingpho\", \"kac_Latn\"),\n    (\"Kamba\", \"kam_Latn\"),\n    (\"Kannada\", \"kan_Knda\"),\n    (\"Kashmiri (Arabic script)\", \"kas_Arab\"),\n    (\"Kashmiri (Devanagari script)\", \"kas_Deva\"),\n    (\"Georgian\", \"kat_Geor\"),\n    (\"Central Kanuri (Arabic script)\", \"knc_Arab\"),\n    (\"Central Kanuri (Latin script)\", \"knc_Latn\"),\n    (\"Kazakh\", \"kaz_Cyrl\"),\n    (\"Kabiyè\", \"kbp_Latn\"),\n    (\"Kabuverdianu\", \"kea_Latn\"),\n    (\"Khmer\", \"khm_Khmr\"),\n    (\"Kikuyu\", \"kik_Latn\"),\n    (\"Kinyarwanda\", \"kin_Latn\"),\n    (\"Kyrgyz\", \"kir_Cyrl\"),\n    (\"Kimbundu\", \"kmb_Latn\"),\n    (\"Northern Kurdish\", \"kmr_Latn\"),\n    (\"Kikongo\", \"kon_Latn\"),\n    (\"Korean\", \"kor_Hang\"),\n    (\"Lao\", \"lao_Laoo\"),\n    (\"Ligurian\", \"lij_Latn\"),\n    (\"Limburgish\", \"lim_Latn\"),\n    (\"Lingala\", \"lin_Latn\"),\n    (\"Lithuanian\", \"lit_Latn\"),\n    (\"Lombard\", \"lmo_Latn\"),\n    (\"Latgalian\", \"ltg_Latn\"),\n    (\"Luxembourgish\", \"ltz_Latn\"),\n    (\"Luba-Kasai\", \"lua_Latn\"),\n    (\"Ganda\", \"lug_Latn\"),\n    (\"Luo\", \"luo_Latn\"),\n    (\"Mizo\", \"lus_Latn\"),\n    (\"Standard Latvian\", \"lvs_Latn\"),\n    (\"Magahi\", \"mag_Deva\"),\n    (\"Maithili\", \"mai_Deva\"),\n    (\"Malayalam\", \"mal_Mlym\"),\n    (\"Marathi\", \"mar_Deva\"),\n    (\"Minangkabau (Arabic script)\", \"min_Arab\"),\n    (\"Minangkabau (Latin script)\", \"min_Latn\"),\n    (\"Macedonian\", \"mkd_Cyrl\"),\n    (\"Plateau Malagasy\", \"plt_Latn\"),\n    (\"Maltese\", \"mlt_Latn\"),\n    (\"Meitei (Bengali script)\", \"mni_Beng\"),\n    (\"Halh Mongolian\", \"khk_Cyrl\"),\n    (\"Mossi\", \"mos_Latn\"),\n    (\"Maori\", \"mri_Latn\"),\n    (\"Burmese\", \"mya_Mymr\"),\n    (\"Dutch\", \"nld_Latn\"),\n    (\"Norwegian Nynorsk\", \"nno_Latn\"),\n    (\"Norwegian Bokmål\", \"nob_Latn\"),\n    (\"Nepali\", \"npi_Deva\"),\n    (\"Northern Sotho\", \"nso_Latn\"),\n    (\"Nuer\", \"nus_Latn\"),\n    (\"Nyanja\", \"nya_Latn\"),\n    (\"Occitan\", \"oci_Latn\"),\n    (\"West Central Oromo\", \"gaz_Latn\"),\n    (\"Odia\", \"ory_Orya\"),\n    (\"Pangasinan\", \"pag_Latn\"),\n    (\"Eastern Panjabi\", \"pan_Guru\"),\n    (\"Papiamento\", \"pap_Latn\"),\n    (\"Western Persian\", \"pes_Arab\"),\n    (\"Polish\", \"pol_Latn\"),\n    (\"Portuguese\", \"por_Latn\"),\n    (\"Dari\", \"prs_Arab\"),\n    (\"Southern Pashto\", \"pbt_Arab\"),\n    (\"Ayacucho Quechua\", \"quy_Latn\"),\n    (\"Romanian\", \"ron_Latn\"),\n    (\"Rundi\", \"run_Latn\"),\n    (\"Russian\", \"rus_Cyrl\"),\n    (\"Sango\", \"sag_Latn\"),\n    (\"Sanskrit\", \"san_Deva\"),\n    (\"Santali\", \"sat_Olck\"),\n    (\"Sicilian\", \"scn_Latn\"),\n    (\"Shan\", \"shn_Mymr\"),\n    (\"Sinhala\", \"sin_Sinh\"),\n    (\"Slovak\", \"slk_Latn\"),\n    (\"Slovenian\", \"slv_Latn\"),\n    (\"Samoan\", \"smo_Latn\"),\n    (\"Shona\", \"sna_Latn\"),\n    (\"Sindhi\", \"snd_Arab\"),\n    (\"Somali\", \"som_Latn\"),\n    (\"Southern Sotho\", \"sot_Latn\"),\n    (\"Spanish\", \"spa_Latn\"),\n    (\"Tosk Albanian\", \"als_Latn\"),\n    (\"Sardinian\", \"srd_Latn\"),\n    (\"Serbian\", \"srp_Cyrl\"),\n    (\"Swati\", \"ssw_Latn\"),\n    (\"Sundanese\", \"sun_Latn\"),\n    (\"Swedish\", \"swe_Latn\"),\n    (\"Swahili\", \"swh_Latn\"),\n    (\"Silesian\", \"szl_Latn\"),\n    (\"Tamil\", \"tam_Taml\"),\n    (\"Tatar\", \"tat_Cyrl\"),\n    (\"Telugu\", \"tel_Telu\"),\n    (\"Tajik\", \"tgk_Cyrl\"),\n    (\"Tagalog\", \"tgl_Latn\"),\n    (\"Thai\", \"tha_Thai\"),\n    (\"Tigrinya\", \"tir_Ethi\"),\n    (\"Tamasheq (Latin script)\", \"taq_Latn\"),\n    (\"Tamasheq (Tifinagh script)\", \"taq_Tfng\"),\n    (\"Tok Pisin\", \"tpi_Latn\"),\n    (\"Tswana\", \"tsn_Latn\"),\n    (\"Tsonga\", \"tso_Latn\"),\n    (\"Turkmen\", \"tuk_Latn\"),\n    (\"Tumbuka\", \"tum_Latn\"),\n    (\"Turkish\", \"tur_Latn\"),\n    (\"Twi\", \"twi_Latn\"),\n    (\"Central Atlas Tamazight\", \"tzm_Tfng\"),\n    (\"Uyghur\", \"uig_Arab\"),\n    (\"Ukrainian\", \"ukr_Cyrl\"),\n    (\"Umbundu\", \"umb_Latn\"),\n    (\"Urdu\", \"urd_Arab\"),\n    (\"Northern Uzbek\", \"uzn_Latn\"),\n    (\"Venetian\", \"vec_Latn\"),\n    (\"Vietnamese\", \"vie_Latn\"),\n    (\"Waray\", \"war_Latn\"),\n    (\"Wolof\", \"wol_Latn\"),\n    (\"Xhosa\", \"xho_Latn\"),\n    (\"Eastern Yiddish\", \"ydd_Hebr\"),\n    (\"Yoruba\", \"yor_Latn\"),\n    (\"Yue Chinese\", \"yue_Hant\"),\n    (\"Chinese (Simplified)\", \"zho_Hans\"),\n    (\"Chinese (Traditional)\", \"zho_Hant\"),\n    (\"Standard Malay\", \"zsm_Latn\"),\n    (\"Zulu\", \"zul_Latn\"),\n)\n"
  },
  {
    "path": "dl_translate/_translation_model.py",
    "content": "import os\nimport json\nfrom typing import Union, List, Dict\n\nimport transformers\nimport torch\nfrom tqdm.auto import tqdm\n\nfrom . import utils\nfrom .utils import _infer_model_family, _infer_model_or_path\n\n\ndef _select_device(device_selection):\n    selected = device_selection.lower()\n    if selected == \"auto\":\n        device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n    elif selected == \"cpu\":\n        device = torch.device(\"cpu\")\n    elif selected == \"gpu\":\n        device = torch.device(\"cuda\")\n    else:\n        device = torch.device(selected)\n\n    return device\n\n\ndef _resolve_lang_codes(lang: str, name: str, model_family: str):\n    def error_message(variable, value):\n        return f'Your {variable}=\"{value}\" is not valid. Please run `print(mt.available_languages())` to see which languages are available. Make sure you are using the correct capital letters.'\n\n    # If can't find in the lang -> code mapping, assumes it's already a code.\n    lang_code_map = utils.get_lang_code_map(model_family)\n    if lang in lang_code_map:\n        code = lang_code_map[lang]\n    elif lang.capitalize() in lang_code_map:\n        code = lang_code_map[lang.capitalize()]\n    else:\n        lang_upper = lang.upper()\n        lang_code_map_upper = {k.upper(): v for k, v in lang_code_map.items()}\n\n        if lang_upper in lang_code_map_upper:\n            code = lang_code_map_upper[lang_upper]\n        else:\n            code = lang\n\n    # If the code is not valid, raises an error\n    if code not in utils.available_codes(model_family):\n        raise ValueError(error_message(name, code))\n\n    return code\n\n\ndef _resolve_tokenizer(model_family):\n    di = {\n        \"mbart50\": transformers.MBart50TokenizerFast,\n        \"m2m100\": transformers.M2M100Tokenizer,\n        \"nllb200\": transformers.AutoTokenizer,\n    }\n    if model_family in di:\n        return di[model_family]\n    else:\n        error_msg = f\"{model_family} is not a valid value for model_family. Please choose model_family to be equal to one of the following values: {list(di.keys())}\"\n        raise ValueError(error_msg)\n\n\ndef _resolve_transformers_model(model_family):\n    di = {\n        \"mbart50\": transformers.MBartForConditionalGeneration,\n        \"m2m100\": transformers.M2M100ForConditionalGeneration,\n        \"nllb200\": transformers.AutoModelForSeq2SeqLM,\n    }\n    if model_family in di:\n        return di[model_family]\n    else:\n        error_msg = f\"{model_family} is not a valid value for model_family. Please choose model_family to be equal to one of the following values: {list(di.keys())}\"\n        raise ValueError(error_msg)\n\n\nclass TranslationModel:\n    def __init__(\n        self,\n        model_or_path: str = \"m2m100\",\n        tokenizer_path: str = None,\n        device: str = \"auto\",\n        model_family: str = None,\n        model_options: dict = None,\n        tokenizer_options: dict = None,\n    ):\n        \"\"\"\n        *Instantiates a multilingual transformer model for translation.*\n\n        {{params}}\n        {{model_or_path}} The path or the name of the model. Equivalent to the first argument of `AutoModel.from_pretrained()`. You can also specify shorthands (\"mbart50\" and \"m2m100\").\n        {{tokenizer_path}} The path to the tokenizer. By default, it will be set to `model_or_path`.\n        {{device}} \"cpu\", \"gpu\" or \"auto\". If it's set to \"auto\", will try to select a GPU when available or else fall back to CPU.\n        {{model_family}} Either \"mbart50\" or \"m2m100\". By default, it will be inferred based on `model_or_path`. Needs to be explicitly set if `model_or_path` is a path.\n        {{model_options}} The keyword arguments passed to the model, which is a transformer for conditional generation.\n        {{tokenizer_options}} The keyword arguments passed to the model's tokenizer.\n        \"\"\"\n        model_or_path = _infer_model_or_path(model_or_path)\n        self.model_or_path = model_or_path\n        self.device = _select_device(device)\n\n        # Resolve default values\n        tokenizer_path = tokenizer_path or self.model_or_path\n        model_options = model_options or {}\n        tokenizer_options = tokenizer_options or {}\n        self.model_family = model_family or _infer_model_family(model_or_path)\n\n        # Load the tokenizer\n        TokenizerFast = _resolve_tokenizer(self.model_family)\n        self._tokenizer = TokenizerFast.from_pretrained(\n            tokenizer_path, **tokenizer_options\n        )\n\n        # Load the model either from a saved torch model or from transformers.\n        if model_or_path.endswith(\".pt\"):\n            self._transformers_model = torch.load(\n                model_or_path, map_location=self.device\n            ).eval()\n        else:\n            ModelForConditionalGeneration = _resolve_transformers_model(\n                self.model_family\n            )\n            self._transformers_model = (\n                ModelForConditionalGeneration.from_pretrained(\n                    self.model_or_path, **model_options\n                )\n                .to(self.device)\n                .eval()\n            )\n\n    def translate(\n        self,\n        text: Union[str, List[str]],\n        source: str,\n        target: str,\n        batch_size: int = 32,\n        verbose: bool = False,\n        generation_options: dict = None,\n    ) -> Union[str, List[str]]:\n        \"\"\"\n        *Translates a string or a list of strings from a source to a target language.*\n\n        {{params}}\n        {{text}} The content you want to translate.\n        {{source}} The language of the original text.\n        {{target}} The language of the translated text.\n        {{batch_size}} The number of samples to load at once. If set to `None`, it will process everything at once.\n        {{verbose}} Whether to display the progress bar for every batch processed.\n        {{generation_options}} The keyword arguments passed to `model.generate()`, where `model` is the underlying transformers model.\n\n        Note:\n        - Run `print(dlt.utils.available_languages())` to see what's available.\n        - A smaller value is preferred for `batch_size` if your (video) RAM is limited.\n        \"\"\"\n        if generation_options is None:\n            generation_options = {}\n\n        source = _resolve_lang_codes(source, \"source\", self.model_family)\n        target = _resolve_lang_codes(target, \"target\", self.model_family)\n\n        self._tokenizer.src_lang = source\n\n        original_text_type = type(text)\n        if original_text_type is str:\n            text = [text]\n\n        if batch_size is None:\n            batch_size = len(text)\n\n        generation_options.setdefault(\n            \"forced_bos_token_id\", self._tokenizer.convert_tokens_to_ids(target)\n        )\n        generation_options.setdefault(\"max_new_tokens\", 512)\n\n        data_loader = torch.utils.data.DataLoader(text, batch_size=batch_size)\n        output_text = []\n\n        tqdm_iterator = data_loader\n        if verbose is True:\n            tqdm_iterator = tqdm(data_loader)\n        with torch.no_grad():\n            for batch in tqdm_iterator:\n                encoded = self._tokenizer(batch, return_tensors=\"pt\", padding=True)\n                encoded.to(self.device)\n\n                generated_tokens = self._transformers_model.generate(\n                    **encoded, **generation_options\n                ).cpu()\n\n                decoded = self._tokenizer.batch_decode(\n                    generated_tokens, skip_special_tokens=True\n                )\n\n                output_text.extend(decoded)\n\n        # If text: str and output_text: List[str], then we should convert output_text to str\n        if original_text_type is str and len(output_text) == 1:\n            output_text = output_text[0]\n\n        return output_text\n\n    def get_transformers_model(self):\n        \"\"\"\n        *Retrieve the underlying mBART transformer model.*\n        \"\"\"\n        return self._transformers_model\n\n    def get_tokenizer(self):\n        \"\"\"\n        *Retrieve the mBART huggingface tokenizer.*\n        \"\"\"\n        return self._tokenizer\n\n    def available_languages(self) -> List[str]:\n        \"\"\"\n        *Returns all the available languages for a given `dlt.TranslationModel`\n        instance.*\n        \"\"\"\n        return utils.available_languages(self.model_family)\n\n    def available_codes(self) -> List[str]:\n        \"\"\"\n        *Returns all the available codes for a given `dlt.TranslationModel`\n        instance.*\n        \"\"\"\n        return utils.available_codes(self.model_family)\n\n    def get_lang_code_map(self) -> Dict[str, str]:\n        \"\"\"\n        *Returns the language -> codes dictionary for a given `dlt.TranslationModel`\n        instance.*\n        \"\"\"\n        return utils.get_lang_code_map(self.model_family)\n\n    def save_obj(self, path: str = \"saved_model\") -> None:\n        \"\"\"\n        *Saves your model as a torch object and save your tokenizer.*\n\n        {{params}}\n        {{path}} The directory where you want to save your model and tokenizer\n        \"\"\"\n        os.makedirs(path, exist_ok=True)\n        torch.save(self._transformers_model, os.path.join(path, \"weights.pt\"))\n        self._tokenizer.save_pretrained(path)\n\n        dlt_config = dict(model_family=self.model_family)\n        json.dump(dlt_config, open(os.path.join(path, \"dlt_config.json\"), \"w\"))\n\n    @classmethod\n    def load_obj(cls, path: str = \"saved_model\", **kwargs):\n        \"\"\"\n        *Initialize `dlt.TranslationModel` from the torch object and tokenizer\n        saved with `dlt.TranslationModel.save_obj`*\n\n        {{params}}\n        {{path}} The directory where your torch model and tokenizer are stored\n        \"\"\"\n        config_prev = json.load(open(os.path.join(path, \"dlt_config.json\"), \"rb\"))\n        config_prev.update(kwargs)\n        return cls(\n            model_or_path=os.path.join(path, \"weights.pt\"),\n            tokenizer_path=path,\n            **config_prev,\n        )\n"
  },
  {
    "path": "dl_translate/lang/__init__.py",
    "content": "from .m2m100 import *\nfrom . import m2m100, mbart50, nllb200\n"
  },
  {
    "path": "dl_translate/lang/m2m100.py",
    "content": "# Auto-generated. Do not modify, use scripts/generate_langs.py instead.\nAFRIKAANS = \"Afrikaans\"\nAMHARIC = \"Amharic\"\nARABIC = \"Arabic\"\nASTURIAN = \"Asturian\"\nAZERBAIJANI = \"Azerbaijani\"\nBASHKIR = \"Bashkir\"\nBELARUSIAN = \"Belarusian\"\nBULGARIAN = \"Bulgarian\"\nBENGALI = \"Bengali\"\nBRETON = \"Breton\"\nBOSNIAN = \"Bosnian\"\nCATALAN = \"Catalan\"\nVALENCIAN = \"Valencian\"\nCEBUANO = \"Cebuano\"\nCZECH = \"Czech\"\nWELSH = \"Welsh\"\nDANISH = \"Danish\"\nGERMAN = \"German\"\nGREEK = \"Greek\"\nENGLISH = \"English\"\nSPANISH = \"Spanish\"\nESTONIAN = \"Estonian\"\nPERSIAN = \"Persian\"\nFULAH = \"Fulah\"\nFINNISH = \"Finnish\"\nFRENCH = \"French\"\nWESTERN_FRISIAN = \"Western Frisian\"\nIRISH = \"Irish\"\nGAELIC = \"Gaelic\"\nSCOTTISH_GAELIC = \"Scottish Gaelic\"\nGALICIAN = \"Galician\"\nGUJARATI = \"Gujarati\"\nHAUSA = \"Hausa\"\nHEBREW = \"Hebrew\"\nHINDI = \"Hindi\"\nCROATIAN = \"Croatian\"\nHAITIAN = \"Haitian\"\nHAITIAN_CREOLE = \"Haitian Creole\"\nHUNGARIAN = \"Hungarian\"\nARMENIAN = \"Armenian\"\nINDONESIAN = \"Indonesian\"\nIGBO = \"Igbo\"\nILOKO = \"Iloko\"\nICELANDIC = \"Icelandic\"\nITALIAN = \"Italian\"\nJAPANESE = \"Japanese\"\nJAVANESE = \"Javanese\"\nGEORGIAN = \"Georgian\"\nKAZAKH = \"Kazakh\"\nKHMER = \"Khmer\"\nCENTRAL_KHMER = \"Central Khmer\"\nKANNADA = \"Kannada\"\nKOREAN = \"Korean\"\nLUXEMBOURGISH = \"Luxembourgish\"\nLETZEBURGESCH = \"Letzeburgesch\"\nGANDA = \"Ganda\"\nLINGALA = \"Lingala\"\nLAO = \"Lao\"\nLITHUANIAN = \"Lithuanian\"\nLATVIAN = \"Latvian\"\nMALAGASY = \"Malagasy\"\nMACEDONIAN = \"Macedonian\"\nMALAYALAM = \"Malayalam\"\nMONGOLIAN = \"Mongolian\"\nMARATHI = \"Marathi\"\nMALAY = \"Malay\"\nBURMESE = \"Burmese\"\nNEPALI = \"Nepali\"\nDUTCH = \"Dutch\"\nFLEMISH = \"Flemish\"\nNORWEGIAN = \"Norwegian\"\nNORTHERN_SOTHO = \"Northern Sotho\"\nOCCITAN = \"Occitan\"\nORIYA = \"Oriya\"\nPANJABI = \"Panjabi\"\nPUNJABI = \"Punjabi\"\nPOLISH = \"Polish\"\nPUSHTO = \"Pushto\"\nPASHTO = \"Pashto\"\nPORTUGUESE = \"Portuguese\"\nROMANIAN = \"Romanian\"\nMOLDAVIAN = \"Moldavian\"\nMOLDOVAN = \"Moldovan\"\nRUSSIAN = \"Russian\"\nSINDHI = \"Sindhi\"\nSINHALA = \"Sinhala\"\nSINHALESE = \"Sinhalese\"\nSLOVAK = \"Slovak\"\nSLOVENIAN = \"Slovenian\"\nSOMALI = \"Somali\"\nALBANIAN = \"Albanian\"\nSERBIAN = \"Serbian\"\nSWATI = \"Swati\"\nSUNDANESE = \"Sundanese\"\nSWEDISH = \"Swedish\"\nSWAHILI = \"Swahili\"\nTAMIL = \"Tamil\"\nTHAI = \"Thai\"\nTAGALOG = \"Tagalog\"\nTSWANA = \"Tswana\"\nTURKISH = \"Turkish\"\nUKRAINIAN = \"Ukrainian\"\nURDU = \"Urdu\"\nUZBEK = \"Uzbek\"\nVIETNAMESE = \"Vietnamese\"\nWOLOF = \"Wolof\"\nXHOSA = \"Xhosa\"\nYIDDISH = \"Yiddish\"\nYORUBA = \"Yoruba\"\nCHINESE = \"Chinese\"\nZULU = \"Zulu\"\n"
  },
  {
    "path": "dl_translate/lang/mbart50.py",
    "content": "# Auto-generated. Do not modify, use scripts/generate_langs.py instead.\nARABIC = \"Arabic\"\nCZECH = \"Czech\"\nGERMAN = \"German\"\nENGLISH = \"English\"\nSPANISH = \"Spanish\"\nESTONIAN = \"Estonian\"\nFINNISH = \"Finnish\"\nFRENCH = \"French\"\nGUJARATI = \"Gujarati\"\nHINDI = \"Hindi\"\nITALIAN = \"Italian\"\nJAPANESE = \"Japanese\"\nKAZAKH = \"Kazakh\"\nKOREAN = \"Korean\"\nLITHUANIAN = \"Lithuanian\"\nLATVIAN = \"Latvian\"\nBURMESE = \"Burmese\"\nNEPALI = \"Nepali\"\nDUTCH = \"Dutch\"\nROMANIAN = \"Romanian\"\nRUSSIAN = \"Russian\"\nSINHALA = \"Sinhala\"\nTURKISH = \"Turkish\"\nVIETNAMESE = \"Vietnamese\"\nCHINESE = \"Chinese\"\nAFRIKAANS = \"Afrikaans\"\nAZERBAIJANI = \"Azerbaijani\"\nBENGALI = \"Bengali\"\nPERSIAN = \"Persian\"\nHEBREW = \"Hebrew\"\nCROATIAN = \"Croatian\"\nINDONESIAN = \"Indonesian\"\nGEORGIAN = \"Georgian\"\nKHMER = \"Khmer\"\nMACEDONIAN = \"Macedonian\"\nMALAYALAM = \"Malayalam\"\nMONGOLIAN = \"Mongolian\"\nMARATHI = \"Marathi\"\nPOLISH = \"Polish\"\nPASHTO = \"Pashto\"\nPORTUGUESE = \"Portuguese\"\nSWEDISH = \"Swedish\"\nSWAHILI = \"Swahili\"\nTAMIL = \"Tamil\"\nTELUGU = \"Telugu\"\nTHAI = \"Thai\"\nTAGALOG = \"Tagalog\"\nUKRAINIAN = \"Ukrainian\"\nURDU = \"Urdu\"\nXHOSA = \"Xhosa\"\nGALICIAN = \"Galician\"\nSLOVENE = \"Slovene\"\n"
  },
  {
    "path": "dl_translate/lang/nllb200.py",
    "content": "# Auto-generated. Do not modify, use scripts/generate_langs.py instead.\nACEHNESE_ARABIC_SCRIPT = \"Acehnese (Arabic script)\"\nACEHNESE_LATIN_SCRIPT = \"Acehnese (Latin script)\"\nMESOPOTAMIAN_ARABIC = \"Mesopotamian Arabic\"\nTAIZZI_ADENI_ARABIC = \"Ta'izzi-Adeni Arabic\"\nTUNISIAN_ARABIC = \"Tunisian Arabic\"\nAFRIKAANS = \"Afrikaans\"\nSOUTH_LEVANTINE_ARABIC = \"South Levantine Arabic\"\nAKAN = \"Akan\"\nAMHARIC = \"Amharic\"\nNORTH_LEVANTINE_ARABIC = \"North Levantine Arabic\"\nMODERN_STANDARD_ARABIC = \"Modern Standard Arabic\"\nMODERN_STANDARD_ARABIC_ROMANIZED = \"Modern Standard Arabic (Romanized)\"\nNAJDI_ARABIC = \"Najdi Arabic\"\nMOROCCAN_ARABIC = \"Moroccan Arabic\"\nEGYPTIAN_ARABIC = \"Egyptian Arabic\"\nASSAMESE = \"Assamese\"\nASTURIAN = \"Asturian\"\nAWADHI = \"Awadhi\"\nCENTRAL_AYMARA = \"Central Aymara\"\nSOUTH_AZERBAIJANI = \"South Azerbaijani\"\nNORTH_AZERBAIJANI = \"North Azerbaijani\"\nBASHKIR = \"Bashkir\"\nBAMBARA = \"Bambara\"\nBALINESE = \"Balinese\"\nBELARUSIAN = \"Belarusian\"\nBEMBA = \"Bemba\"\nBENGALI = \"Bengali\"\nBHOJPURI = \"Bhojpuri\"\nBANJAR_ARABIC_SCRIPT = \"Banjar (Arabic script)\"\nBANJAR_LATIN_SCRIPT = \"Banjar (Latin script)\"\nSTANDARD_TIBETAN = \"Standard Tibetan\"\nBOSNIAN = \"Bosnian\"\nBUGINESE = \"Buginese\"\nBULGARIAN = \"Bulgarian\"\nCATALAN = \"Catalan\"\nCEBUANO = \"Cebuano\"\nCZECH = \"Czech\"\nCHOKWE = \"Chokwe\"\nCENTRAL_KURDISH = \"Central Kurdish\"\nCRIMEAN_TATAR = \"Crimean Tatar\"\nWELSH = \"Welsh\"\nDANISH = \"Danish\"\nGERMAN = \"German\"\nSOUTHWESTERN_DINKA = \"Southwestern Dinka\"\nDYULA = \"Dyula\"\nDZONGKHA = \"Dzongkha\"\nGREEK = \"Greek\"\nENGLISH = \"English\"\nESPERANTO = \"Esperanto\"\nESTONIAN = \"Estonian\"\nBASQUE = \"Basque\"\nEWE = \"Ewe\"\nFAROESE = \"Faroese\"\nFIJIAN = \"Fijian\"\nFINNISH = \"Finnish\"\nFON = \"Fon\"\nFRENCH = \"French\"\nFRIULIAN = \"Friulian\"\nNIGERIAN_FULFULDE = \"Nigerian Fulfulde\"\nSCOTTISH_GAELIC = \"Scottish Gaelic\"\nIRISH = \"Irish\"\nGALICIAN = \"Galician\"\nGUARANI = \"Guarani\"\nGUJARATI = \"Gujarati\"\nHAITIAN_CREOLE = \"Haitian Creole\"\nHAUSA = \"Hausa\"\nHEBREW = \"Hebrew\"\nHINDI = \"Hindi\"\nCHHATTISGARHI = \"Chhattisgarhi\"\nCROATIAN = \"Croatian\"\nHUNGARIAN = \"Hungarian\"\nARMENIAN = \"Armenian\"\nIGBO = \"Igbo\"\nILOCANO = \"Ilocano\"\nINDONESIAN = \"Indonesian\"\nICELANDIC = \"Icelandic\"\nITALIAN = \"Italian\"\nJAVANESE = \"Javanese\"\nJAPANESE = \"Japanese\"\nKABYLE = \"Kabyle\"\nJINGPHO = \"Jingpho\"\nKAMBA = \"Kamba\"\nKANNADA = \"Kannada\"\nKASHMIRI_ARABIC_SCRIPT = \"Kashmiri (Arabic script)\"\nKASHMIRI_DEVANAGARI_SCRIPT = \"Kashmiri (Devanagari script)\"\nGEORGIAN = \"Georgian\"\nCENTRAL_KANURI_ARABIC_SCRIPT = \"Central Kanuri (Arabic script)\"\nCENTRAL_KANURI_LATIN_SCRIPT = \"Central Kanuri (Latin script)\"\nKAZAKH = \"Kazakh\"\nKABIYÈ = \"Kabiyè\"\nKABUVERDIANU = \"Kabuverdianu\"\nKHMER = \"Khmer\"\nKIKUYU = \"Kikuyu\"\nKINYARWANDA = \"Kinyarwanda\"\nKYRGYZ = \"Kyrgyz\"\nKIMBUNDU = \"Kimbundu\"\nNORTHERN_KURDISH = \"Northern Kurdish\"\nKIKONGO = \"Kikongo\"\nKOREAN = \"Korean\"\nLAO = \"Lao\"\nLIGURIAN = \"Ligurian\"\nLIMBURGISH = \"Limburgish\"\nLINGALA = \"Lingala\"\nLITHUANIAN = \"Lithuanian\"\nLOMBARD = \"Lombard\"\nLATGALIAN = \"Latgalian\"\nLUXEMBOURGISH = \"Luxembourgish\"\nLUBA_KASAI = \"Luba-Kasai\"\nGANDA = \"Ganda\"\nLUO = \"Luo\"\nMIZO = \"Mizo\"\nSTANDARD_LATVIAN = \"Standard Latvian\"\nMAGAHI = \"Magahi\"\nMAITHILI = \"Maithili\"\nMALAYALAM = \"Malayalam\"\nMARATHI = \"Marathi\"\nMINANGKABAU_ARABIC_SCRIPT = \"Minangkabau (Arabic script)\"\nMINANGKABAU_LATIN_SCRIPT = \"Minangkabau (Latin script)\"\nMACEDONIAN = \"Macedonian\"\nPLATEAU_MALAGASY = \"Plateau Malagasy\"\nMALTESE = \"Maltese\"\nMEITEI_BENGALI_SCRIPT = \"Meitei (Bengali script)\"\nHALH_MONGOLIAN = \"Halh Mongolian\"\nMOSSI = \"Mossi\"\nMAORI = \"Maori\"\nBURMESE = \"Burmese\"\nDUTCH = \"Dutch\"\nNORWEGIAN_NYNORSK = \"Norwegian Nynorsk\"\nNORWEGIAN_BOKMÅL = \"Norwegian Bokmål\"\nNEPALI = \"Nepali\"\nNORTHERN_SOTHO = \"Northern Sotho\"\nNUER = \"Nuer\"\nNYANJA = \"Nyanja\"\nOCCITAN = \"Occitan\"\nWEST_CENTRAL_OROMO = \"West Central Oromo\"\nODIA = \"Odia\"\nPANGASINAN = \"Pangasinan\"\nEASTERN_PANJABI = \"Eastern Panjabi\"\nPAPIAMENTO = \"Papiamento\"\nWESTERN_PERSIAN = \"Western Persian\"\nPOLISH = \"Polish\"\nPORTUGUESE = \"Portuguese\"\nDARI = \"Dari\"\nSOUTHERN_PASHTO = \"Southern Pashto\"\nAYACUCHO_QUECHUA = \"Ayacucho Quechua\"\nROMANIAN = \"Romanian\"\nRUNDI = \"Rundi\"\nRUSSIAN = \"Russian\"\nSANGO = \"Sango\"\nSANSKRIT = \"Sanskrit\"\nSANTALI = \"Santali\"\nSICILIAN = \"Sicilian\"\nSHAN = \"Shan\"\nSINHALA = \"Sinhala\"\nSLOVAK = \"Slovak\"\nSLOVENIAN = \"Slovenian\"\nSAMOAN = \"Samoan\"\nSHONA = \"Shona\"\nSINDHI = \"Sindhi\"\nSOMALI = \"Somali\"\nSOUTHERN_SOTHO = \"Southern Sotho\"\nSPANISH = \"Spanish\"\nTOSK_ALBANIAN = \"Tosk Albanian\"\nSARDINIAN = \"Sardinian\"\nSERBIAN = \"Serbian\"\nSWATI = \"Swati\"\nSUNDANESE = \"Sundanese\"\nSWEDISH = \"Swedish\"\nSWAHILI = \"Swahili\"\nSILESIAN = \"Silesian\"\nTAMIL = \"Tamil\"\nTATAR = \"Tatar\"\nTELUGU = \"Telugu\"\nTAJIK = \"Tajik\"\nTAGALOG = \"Tagalog\"\nTHAI = \"Thai\"\nTIGRINYA = \"Tigrinya\"\nTAMASHEQ_LATIN_SCRIPT = \"Tamasheq (Latin script)\"\nTAMASHEQ_TIFINAGH_SCRIPT = \"Tamasheq (Tifinagh script)\"\nTOK_PISIN = \"Tok Pisin\"\nTSWANA = \"Tswana\"\nTSONGA = \"Tsonga\"\nTURKMEN = \"Turkmen\"\nTUMBUKA = \"Tumbuka\"\nTURKISH = \"Turkish\"\nTWI = \"Twi\"\nCENTRAL_ATLAS_TAMAZIGHT = \"Central Atlas Tamazight\"\nUYGHUR = \"Uyghur\"\nUKRAINIAN = \"Ukrainian\"\nUMBUNDU = \"Umbundu\"\nURDU = \"Urdu\"\nNORTHERN_UZBEK = \"Northern Uzbek\"\nVENETIAN = \"Venetian\"\nVIETNAMESE = \"Vietnamese\"\nWARAY = \"Waray\"\nWOLOF = \"Wolof\"\nXHOSA = \"Xhosa\"\nEASTERN_YIDDISH = \"Eastern Yiddish\"\nYORUBA = \"Yoruba\"\nYUE_CHINESE = \"Yue Chinese\"\nCHINESE_SIMPLIFIED = \"Chinese (Simplified)\"\nCHINESE_TRADITIONAL = \"Chinese (Traditional)\"\nSTANDARD_MALAY = \"Standard Malay\"\nZULU = \"Zulu\"\n"
  },
  {
    "path": "dl_translate/utils.py",
    "content": "from typing import Dict, List\n\nfrom ._pairs import _PAIRS_MBART50, _PAIRS_M2M100, _PAIRS_NLLB200\n\n\ndef _infer_model_family(model_or_path):\n    di = {\n        \"facebook/mbart-large-50-many-to-many-mmt\": \"mbart50\",\n        \"facebook/m2m100_418M\": \"m2m100\",\n        \"facebook/m2m100_1.2B\": \"m2m100\",\n        \"facebook/nllb-200-distilled-600M\": \"nllb200\",\n        \"facebook/nllb-200-distilled-1.3B\": \"nllb200\",\n        \"facebook/nllb-200-1.3B\": \"nllb200\",\n        \"facebook/nllb-200-3.3B\": \"nllb200\",\n    }\n\n    if model_or_path in di:\n        return di[model_or_path]\n    else:\n        error_msg = f'Unable to infer the model_family from \"{model_or_path}\". Try explicitly setting the value of model_family to \"mbart50\" or \"m2m100\".'\n        raise ValueError(error_msg)\n\n\ndef _infer_model_or_path(model_or_path):\n    di = {\n        \"mbart50\": \"facebook/mbart-large-50-many-to-many-mmt\",\n        \"m2m100\": \"facebook/m2m100_418M\",\n        \"m2m100-small\": \"facebook/m2m100_418M\",\n        \"m2m100-medium\": \"facebook/m2m100_1.2B\",\n        \"nllb200\": \"facebook/nllb-200-distilled-600M\",\n        \"nllb200-small\": \"facebook/nllb-200-distilled-600M\",\n        \"nllb200-medium\": \"facebook/nllb-200-distilled-1.3B\",\n        \"nllb200-medium-regular\": \"facebook/nllb-200-1.3B\",\n        \"nllb200-large\": \"facebook/nllb-200-3.3B\",\n    }\n\n    return di.get(model_or_path, model_or_path)\n\n\ndef _weights2pairs():\n    return {\n        \"mbart50\": _PAIRS_MBART50,\n        \"mbart-large-50-many-to-many-mmt\": _PAIRS_MBART50,\n        \"facebook/mbart-large-50-many-to-many-mmt\": _PAIRS_MBART50,\n        \"m2m100\": _PAIRS_M2M100,\n        \"m2m100_418M\": _PAIRS_M2M100,\n        \"m2m100_1.2B\": _PAIRS_M2M100,\n        \"facebook/m2m100_418M\": _PAIRS_M2M100,\n        \"facebook/m2m100_1.2B\": _PAIRS_M2M100,\n        \"nllb200\": _PAIRS_NLLB200,\n        \"nllb-200-distilled\": _PAIRS_NLLB200,\n        \"nllb-200-distilled-600M\": _PAIRS_NLLB200,\n        \"nllb-200-distilled-1.3B\": _PAIRS_NLLB200,\n        \"nllb-200-1.3B\": _PAIRS_NLLB200,\n        \"nllb-200-3.3B\": _PAIRS_NLLB200,\n        \"facebook/nllb-200-distilled-600M\": _PAIRS_NLLB200,\n        \"facebook/nllb-200-distilled-1.3B\": _PAIRS_NLLB200,\n        \"facebook/nllb-200-1.3B\": _PAIRS_NLLB200,\n        \"facebook/nllb-200-3.3B\": _PAIRS_NLLB200,\n    }\n\n\ndef _dict_from_weights(weights: str) -> dict:\n    \"\"\"Returns a dictionary of lang, codes, pairs if the provided weights is supported.\"\"\"\n    if weights in _weights2pairs():\n        pairs = _weights2pairs()[weights]\n        return {\n            \"langs\": tuple(pair[0] for pair in pairs),\n            \"codes\": tuple(pair[1] for pair in pairs),\n            \"pairs\": dict(pairs),\n        }\n    elif weights.lower() in _weights2pairs():\n        pairs = _weights2pairs()[weights.lower()]\n        return {\n            \"langs\": tuple(pair[0] for pair in pairs),\n            \"codes\": tuple(pair[1] for pair in pairs),\n            \"pairs\": dict(pairs),\n        }\n\n    else:\n        error_message = f\"Incorrect argument '{weights}' for parameter weights. Please choose from: {list(_weights2pairs().keys())}\"\n        raise ValueError(error_message)\n\n\ndef get_lang_code_map(weights: str = \"m2m100\") -> Dict[str, str]:\n    \"\"\"\n    *Get a dictionary mapping a language -> code for a given model. The code will depend on the model you choose.*\n\n    {{params}}\n    {{weights}} The name of the model you are using. For example, \"mbart50\" is the multilingual BART Large with 50 languages available to use.\n    \"\"\"\n    return _dict_from_weights(weights)[\"pairs\"]\n\n\ndef available_languages(weights: str = \"m2m100\") -> List[str]:\n    \"\"\"\n    *Get all the languages available for a given model.*\n\n    {{params}}\n    {{weights}} The name of the model you are using. For example, \"mbart50\" is the multilingual BART Large with 50 languages available to use.\n    \"\"\"\n    return _dict_from_weights(weights)[\"langs\"]\n\n\ndef available_codes(weights: str = \"m2m100\") -> List[str]:\n    \"\"\"\n    *Get all the codes available for a given model. The code format will depend on the model you select.*\n\n    {{params}}\n    {{weights}} The name of the model you are using. For example, \"mbart50\" is the multilingual BART Large with 50 codes available to use.\n    \"\"\"\n    return _dict_from_weights(weights)[\"codes\"]\n"
  },
  {
    "path": "docs/available_languages.md",
    "content": "# Languages Available\n\nThis page gives all the languages available for each model family.\n\n## MBart 50\n\n| Language Name | Code |\n| --- | --- |\n| Arabic | ar_AR |\n| Czech | cs_CZ |\n| German | de_DE |\n| English | en_XX |\n| Spanish | es_XX |\n| Estonian | et_EE |\n| Finnish | fi_FI |\n| French | fr_XX |\n| Gujarati | gu_IN |\n| Hindi | hi_IN |\n| Italian | it_IT |\n| Japanese | ja_XX |\n| Kazakh | kk_KZ |\n| Korean | ko_KR |\n| Lithuanian | lt_LT |\n| Latvian | lv_LV |\n| Burmese | my_MM |\n| Nepali | ne_NP |\n| Dutch | nl_XX |\n| Romanian | ro_RO |\n| Russian | ru_RU |\n| Sinhala | si_LK |\n| Turkish | tr_TR |\n| Vietnamese | vi_VN |\n| Chinese | zh_CN |\n| Afrikaans | af_ZA |\n| Azerbaijani | az_AZ |\n| Bengali | bn_IN |\n| Persian | fa_IR |\n| Hebrew | he_IL |\n| Croatian | hr_HR |\n| Indonesian | id_ID |\n| Georgian | ka_GE |\n| Khmer | km_KH |\n| Macedonian | mk_MK |\n| Malayalam | ml_IN |\n| Mongolian | mn_MN |\n| Marathi | mr_IN |\n| Polish | pl_PL |\n| Pashto | ps_AF |\n| Portuguese | pt_XX |\n| Swedish | sv_SE |\n| Swahili | sw_KE |\n| Tamil | ta_IN |\n| Telugu | te_IN |\n| Thai | th_TH |\n| Tagalog | tl_XX |\n| Ukrainian | uk_UA |\n| Urdu | ur_PK |\n| Xhosa | xh_ZA |\n| Galician | gl_ES |\n| Slovene | sl_SI |\n\n\n## M2M-100\n\n| Language Name | Code |\n| --- | --- |\n| Afrikaans | af |\n| Amharic | am |\n| Arabic | ar |\n| Asturian | ast |\n| Azerbaijani | az |\n| Bashkir | ba |\n| Belarusian | be |\n| Bulgarian | bg |\n| Bengali | bn |\n| Breton | br |\n| Bosnian | bs |\n| Catalan | ca |\n| Valencian | ca |\n| Cebuano | ceb |\n| Czech | cs |\n| Welsh | cy |\n| Danish | da |\n| German | de |\n| Greek | el |\n| English | en |\n| Spanish | es |\n| Estonian | et |\n| Persian | fa |\n| Fulah | ff |\n| Finnish | fi |\n| French | fr |\n| Western Frisian | fy |\n| Irish | ga |\n| Gaelic | gd |\n| Scottish Gaelic | gd |\n| Galician | gl |\n| Gujarati | gu |\n| Hausa | ha |\n| Hebrew | he |\n| Hindi | hi |\n| Croatian | hr |\n| Haitian | ht |\n| Haitian Creole | ht |\n| Hungarian | hu |\n| Armenian | hy |\n| Indonesian | id |\n| Igbo | ig |\n| Iloko | ilo |\n| Icelandic | is |\n| Italian | it |\n| Japanese | ja |\n| Javanese | jv |\n| Georgian | ka |\n| Kazakh | kk |\n| Khmer | km |\n| Central Khmer | km |\n| Kannada | kn |\n| Korean | ko |\n| Luxembourgish | lb |\n| Letzeburgesch | lb |\n| Ganda | lg |\n| Lingala | ln |\n| Lao | lo |\n| Lithuanian | lt |\n| Latvian | lv |\n| Malagasy | mg |\n| Macedonian | mk |\n| Malayalam | ml |\n| Mongolian | mn |\n| Marathi | mr |\n| Malay | ms |\n| Burmese | my |\n| Nepali | ne |\n| Dutch | nl |\n| Flemish | nl |\n| Norwegian | no |\n| Northern Sotho | ns |\n| Occitan | oc |\n| Oriya | or |\n| Panjabi | pa |\n| Punjabi | pa |\n| Polish | pl |\n| Pushto | ps |\n| Pashto | ps |\n| Portuguese | pt |\n| Romanian | ro |\n| Moldavian | ro |\n| Moldovan | ro |\n| Russian | ru |\n| Sindhi | sd |\n| Sinhala | si |\n| Sinhalese | si |\n| Slovak | sk |\n| Slovenian | sl |\n| Somali | so |\n| Albanian | sq |\n| Serbian | sr |\n| Swati | ss |\n| Sundanese | su |\n| Swedish | sv |\n| Swahili | sw |\n| Tamil | ta |\n| Thai | th |\n| Tagalog | tl |\n| Tswana | tn |\n| Turkish | tr |\n| Ukrainian | uk |\n| Urdu | ur |\n| Uzbek | uz |\n| Vietnamese | vi |\n| Wolof | wo |\n| Xhosa | xh |\n| Yiddish | yi |\n| Yoruba | yo |\n| Chinese | zh |\n| Zulu | zu |\n\n\n## NLLB-200\n\n| Language Name | Code |\n| --- | --- |\n| Acehnese (Arabic script) | ace_Arab |\n| Acehnese (Latin script) | ace_Latn |\n| Mesopotamian Arabic | acm_Arab |\n| Ta'izzi-Adeni Arabic | acq_Arab |\n| Tunisian Arabic | aeb_Arab |\n| Afrikaans | afr_Latn |\n| South Levantine Arabic | ajp_Arab |\n| Akan | aka_Latn |\n| Amharic | amh_Ethi |\n| North Levantine Arabic | apc_Arab |\n| Modern Standard Arabic | arb_Arab |\n| Modern Standard Arabic (Romanized) | arb_Latn |\n| Najdi Arabic | ars_Arab |\n| Moroccan Arabic | ary_Arab |\n| Egyptian Arabic | arz_Arab |\n| Assamese | asm_Beng |\n| Asturian | ast_Latn |\n| Awadhi | awa_Deva |\n| Central Aymara | ayr_Latn |\n| South Azerbaijani | azb_Arab |\n| North Azerbaijani | azj_Latn |\n| Bashkir | bak_Cyrl |\n| Bambara | bam_Latn |\n| Balinese | ban_Latn |\n| Belarusian | bel_Cyrl |\n| Bemba | bem_Latn |\n| Bengali | ben_Beng |\n| Bhojpuri | bho_Deva |\n| Banjar (Arabic script) | bjn_Arab |\n| Banjar (Latin script) | bjn_Latn |\n| Standard Tibetan | bod_Tibt |\n| Bosnian | bos_Latn |\n| Buginese | bug_Latn |\n| Bulgarian | bul_Cyrl |\n| Catalan | cat_Latn |\n| Cebuano | ceb_Latn |\n| Czech | ces_Latn |\n| Chokwe | cjk_Latn |\n| Central Kurdish | ckb_Arab |\n| Crimean Tatar | crh_Latn |\n| Welsh | cym_Latn |\n| Danish | dan_Latn |\n| German | deu_Latn |\n| Southwestern Dinka | dik_Latn |\n| Dyula | dyu_Latn |\n| Dzongkha | dzo_Tibt |\n| Greek | ell_Grek |\n| English | eng_Latn |\n| Esperanto | epo_Latn |\n| Estonian | est_Latn |\n| Basque | eus_Latn |\n| Ewe | ewe_Latn |\n| Faroese | fao_Latn |\n| Fijian | fij_Latn |\n| Finnish | fin_Latn |\n| Fon | fon_Latn |\n| French | fra_Latn |\n| Friulian | fur_Latn |\n| Nigerian Fulfulde | fuv_Latn |\n| Scottish Gaelic | gla_Latn |\n| Irish | gle_Latn |\n| Galician | glg_Latn |\n| Guarani | grn_Latn |\n| Gujarati | guj_Gujr |\n| Haitian Creole | hat_Latn |\n| Hausa | hau_Latn |\n| Hebrew | heb_Hebr |\n| Hindi | hin_Deva |\n| Chhattisgarhi | hne_Deva |\n| Croatian | hrv_Latn |\n| Hungarian | hun_Latn |\n| Armenian | hye_Armn |\n| Igbo | ibo_Latn |\n| Ilocano | ilo_Latn |\n| Indonesian | ind_Latn |\n| Icelandic | isl_Latn |\n| Italian | ita_Latn |\n| Javanese | jav_Latn |\n| Japanese | jpn_Jpan |\n| Kabyle | kab_Latn |\n| Jingpho | kac_Latn |\n| Kamba | kam_Latn |\n| Kannada | kan_Knda |\n| Kashmiri (Arabic script) | kas_Arab |\n| Kashmiri (Devanagari script) | kas_Deva |\n| Georgian | kat_Geor |\n| Central Kanuri (Arabic script) | knc_Arab |\n| Central Kanuri (Latin script) | knc_Latn |\n| Kazakh | kaz_Cyrl |\n| Kabiyè | kbp_Latn |\n| Kabuverdianu | kea_Latn |\n| Khmer | khm_Khmr |\n| Kikuyu | kik_Latn |\n| Kinyarwanda | kin_Latn |\n| Kyrgyz | kir_Cyrl |\n| Kimbundu | kmb_Latn |\n| Northern Kurdish | kmr_Latn |\n| Kikongo | kon_Latn |\n| Korean | kor_Hang |\n| Lao | lao_Laoo |\n| Ligurian | lij_Latn |\n| Limburgish | lim_Latn |\n| Lingala | lin_Latn |\n| Lithuanian | lit_Latn |\n| Lombard | lmo_Latn |\n| Latgalian | ltg_Latn |\n| Luxembourgish | ltz_Latn |\n| Luba-Kasai | lua_Latn |\n| Ganda | lug_Latn |\n| Luo | luo_Latn |\n| Mizo | lus_Latn |\n| Standard Latvian | lvs_Latn |\n| Magahi | mag_Deva |\n| Maithili | mai_Deva |\n| Malayalam | mal_Mlym |\n| Marathi | mar_Deva |\n| Minangkabau (Arabic script) | min_Arab |\n| Minangkabau (Latin script) | min_Latn |\n| Macedonian | mkd_Cyrl |\n| Plateau Malagasy | plt_Latn |\n| Maltese | mlt_Latn |\n| Meitei (Bengali script) | mni_Beng |\n| Halh Mongolian | khk_Cyrl |\n| Mossi | mos_Latn |\n| Maori | mri_Latn |\n| Burmese | mya_Mymr |\n| Dutch | nld_Latn |\n| Norwegian Nynorsk | nno_Latn |\n| Norwegian Bokmål | nob_Latn |\n| Nepali | npi_Deva |\n| Northern Sotho | nso_Latn |\n| Nuer | nus_Latn |\n| Nyanja | nya_Latn |\n| Occitan | oci_Latn |\n| West Central Oromo | gaz_Latn |\n| Odia | ory_Orya |\n| Pangasinan | pag_Latn |\n| Eastern Panjabi | pan_Guru |\n| Papiamento | pap_Latn |\n| Western Persian | pes_Arab |\n| Polish | pol_Latn |\n| Portuguese | por_Latn |\n| Dari | prs_Arab |\n| Southern Pashto | pbt_Arab |\n| Ayacucho Quechua | quy_Latn |\n| Romanian | ron_Latn |\n| Rundi | run_Latn |\n| Russian | rus_Cyrl |\n| Sango | sag_Latn |\n| Sanskrit | san_Deva |\n| Santali | sat_Olck |\n| Sicilian | scn_Latn |\n| Shan | shn_Mymr |\n| Sinhala | sin_Sinh |\n| Slovak | slk_Latn |\n| Slovenian | slv_Latn |\n| Samoan | smo_Latn |\n| Shona | sna_Latn |\n| Sindhi | snd_Arab |\n| Somali | som_Latn |\n| Southern Sotho | sot_Latn |\n| Spanish | spa_Latn |\n| Tosk Albanian | als_Latn |\n| Sardinian | srd_Latn |\n| Serbian | srp_Cyrl |\n| Swati | ssw_Latn |\n| Sundanese | sun_Latn |\n| Swedish | swe_Latn |\n| Swahili | swh_Latn |\n| Silesian | szl_Latn |\n| Tamil | tam_Taml |\n| Tatar | tat_Cyrl |\n| Telugu | tel_Telu |\n| Tajik | tgk_Cyrl |\n| Tagalog | tgl_Latn |\n| Thai | tha_Thai |\n| Tigrinya | tir_Ethi |\n| Tamasheq (Latin script) | taq_Latn |\n| Tamasheq (Tifinagh script) | taq_Tfng |\n| Tok Pisin | tpi_Latn |\n| Tswana | tsn_Latn |\n| Tsonga | tso_Latn |\n| Turkmen | tuk_Latn |\n| Tumbuka | tum_Latn |\n| Turkish | tur_Latn |\n| Twi | twi_Latn |\n| Central Atlas Tamazight | tzm_Tfng |\n| Uyghur | uig_Arab |\n| Ukrainian | ukr_Cyrl |\n| Umbundu | umb_Latn |\n| Urdu | urd_Arab |\n| Northern Uzbek | uzn_Latn |\n| Venetian | vec_Latn |\n| Vietnamese | vie_Latn |\n| Waray | war_Latn |\n| Wolof | wol_Latn |\n| Xhosa | xho_Latn |\n| Eastern Yiddish | ydd_Hebr |\n| Yoruba | yor_Latn |\n| Yue Chinese | yue_Hant |\n| Chinese (Simplified) | zho_Hans |\n| Chinese (Traditional) | zho_Hant |\n| Standard Malay | zsm_Latn |\n| Zulu | zul_Latn |\n"
  },
  {
    "path": "docs/contributing.md",
    "content": "# Contributions\n\nIf you wish to contribute to the project, please do the following:\n1. Verify if there's an existing similar issue.\n2. If no issue exists, create it.\n3. Once the contribution has been discussed inside the issue, fork this repo.\n4. Before modifying any code, make sure to read the sections below.\n5. Once you are done with your contribution, start a PR and tag a codeowner.\n\n\n## Setup\n\nTo set up the development environment, clone the repo:\n\n```bash\ngit clone https://github.com/xhlulu/dl-translate\ncd dl-translate\n```\n\nCreate a new venv and install the dev dependencies\n```bash\npython -m venv venv\nsource venv/bin/activate\npip install -e .[dev]\n```\n\n## Code linting\n\nTo ensure consistent and readable code, we use `black`. To run:\n\n```bash\npython black .\n```\n\n## Running tests\n\nTo run **all** the tests:\n```bash\npython -m pytest tests\n```\n\nFor quick tests, run:\n```bash\npython -m pytest tests/fast\n```\n\n## Documentation\n\nTo re-generate the documentation after the source code was modified:\n```bash\npython scripts/render_references.py\n```\n\nTo run the docs locally, run:\n```\nmkdocs serve -t material\n```\n\nOnce ready, you can build it:\n```\nmkdocs build -t material\n```\n\nOr release it on GitHub Pages:\n```\nmkdocs gh-deploy -t material\n```"
  },
  {
    "path": "docs/index.md",
    "content": "# User Guide\n\nQuick links:\n\n💻 [GitHub Repository](https://github.com/xhlulu/dl-translate)<br>\n📚 [Documentation](https://xhluca.github.io/dl-translate)<br>\n🐍 [PyPi project](https://pypi.org/project/dl-translate/)<br>\n🧪 [Colab Demo](https://colab.research.google.com/github/xhlulu/dl-translate/blob/main/demos/colab_demo.ipynb) / [Kaggle Demo](https://www.kaggle.com/xhlulu/dl-translate-demo/)\n\n\n\n## Quickstart\n\nInstall the library with pip:\n```\npip install dl-translate\n```\n\nTo translate some text:\n\n```python\nimport dl_translate as dlt\n\nmt = dlt.TranslationModel()  # Slow when you load it for the first time\n\ntext_hi = \"संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है\"\nmt.translate(text_hi, source=dlt.lang.HINDI, target=dlt.lang.ENGLISH)\n```\n\nAbove, you can see that `dlt.lang` contains variables representing each of the 50 available languages with auto-complete support. Alternatively, you can specify the language (e.g. \"Arabic\") or the language code (e.g. \"fr\" for French):\n```python\ntext_ar = \"الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا.\"\nmt.translate(text_ar, source=\"Arabic\", target=\"fr\")\n```\n\nIf you want to verify whether a language is available, you can check it:\n```python\nprint(mt.available_languages())  # All languages that you can use\nprint(mt.available_codes())  # Code corresponding to each language accepted\nprint(mt.get_lang_code_map())  # Dictionary of lang -> code\n```\n\n## Usage\n\n### Selecting a device\n\nWhen you load the model, you can specify the device using the `device` argument. By default, the value will be `device=\"auto\"`, which means it will use a GPU if possible. You can also explicitly set `device=\"cpu\"` or `device=\"gpu\"`, or some other strings accepted by [`torch.device()`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device). __In general, it is recommend to use a GPU if you want a reasonable processing time.__\n\n```python\nmt = dlt.TranslationModel(device=\"auto\")  # Automatically select device\nmt = dlt.TranslationModel(device=\"cpu\")  # Force you to use a CPU\nmt = dlt.TranslationModel(device=\"gpu\")  # Force you to use a GPU\nmt = dlt.TranslationModel(device=\"cuda:2\")  # Use the 3rd GPU available\n```\n\n### Choosing a different model\n\nBy default, the `m2m100` model will be used. However, there are a few options:\n\n* [mBART-50 Large](https://huggingface.co/transformers/master/model_doc/mbart.html):  Allows translations across 50 languages.\n* [m2m100](https://huggingface.co/transformers/model_doc/m2m_100.html): Allows translations across 100 languages.\n* [nllb-200](https://huggingface.co/docs/transformers/model_doc/nllb) (New in v0.3): Allows translations across 200 languages, and is faster than m2m100 (On RTX A6000, we can see speed up of 3x).\n\nHere's an example:\n```python\n# The default approval\nmt = dlt.TranslationModel(\"m2m100\")  # Shorthand\nmt = dlt.TranslationModel(\"facebook/m2m100_418M\")  # Huggingface repo\n\n# If you want to use mBART-50 Large\nmt = dlt.TranslationModel(\"mbart50\")\nmt = dlt.TranslationModel(\"facebook/mbart-large-50-many-to-many-mmt\")\n\n# Or NLLB-200 (faster and has 200 languages)\nmt = dlt.TranslationModel(\"nllb200\")\nmt = dlt.TranslationModel(\"facebook/nllb-200-distilled-600M\")\n```\n\nNote that the language code will change depending on the model family. To find out the correct language codes, please read the doc page on available languages or run `mt.available_codes()`.\n\nBy default, `dlt.TranslationModel` will download the model from the huggingface repo for [mbart50](https://huggingface.co/facebook/mbart-large-50-one-to-many-mmt), [m2m100](https://huggingface.co/facebook/m2m100_418M), or [nllb200](https://huggingface.co/facebook/nllb-200-distilled-600M) and cache it. It's possible to load the model from a path or a model with a similar format, but you will need to specify the `model_family`:\n```python\nmt = dlt.TranslationModel(\"/path/to/model/directory/\", model_family=\"mbart50\")\nmt = dlt.TranslationModel(\"facebook/m2m100_1.2B\", model_family=\"m2m100\")\nmt = dlt.TranslationModel(\"facebook/nllb-200-distilled-600M\", model_family=\"nllb200\")\n```\n\nNotes:\n* Make sure your tokenizer is also stored in the same directory if you load from a file. \n* The available languages will change if you select a different model, so you will not be able to leverage `dlt.lang` or `dlt.utils`.\n\n### Breaking down into sentences\n\nIt is not recommended to use extremely long texts as it takes more time to process. Instead, you can try to break them down into sentences. Multiple solutions exists for that, including doing it manually and using the `nltk` library.\n\nA quick approach would be to split them by period. However, you have to ensure that there are no periods used for abbreviations (such as `Mr.` or `Dr.`). For example, it will work in the following case:\n```python\ntext = \"Mr Smith went to his favorite cafe. There, he met his friend Dr Doe.\"\nsents = text.split(\".\")\n\".\".join(mt.translate(sents, source=dlt.lang.ENGLISH, target=dlt.lang.FRENCH))\n```\n\n\nFor more complex cases (e.g. where you use periods for abbreviations), you can use `nltk`. First install the library with `pip install nltk`, then run:\n```python\nimport nltk\n\nnltk.download(\"punkt\")\n\ntext = \"Mr. Smith went to his favorite cafe. There, he met his friend Dr. Doe.\"\nsents = nltk.tokenize.sent_tokenize(text, \"english\")  # don't use dlt.lang.ENGLISH\n\" \".join(mt.translate(sents, source=dlt.lang.ENGLISH, target=dlt.lang.FRENCH))\n```\n\n\n\n### Batch size and verbosity when using `translate`\n\nIt's possible to set a batch size (i.e. the number of elements processed at once) for `mt.translate` and whether you want to see the progress bar or not:\n\n```python\n...\nmt = dlt.TranslationModel()\nmt.translate(text, source, target, batch_size=32, verbose=True)\n```\n\nIf you set `batch_size=None`, it will compute the entire `text` at once rather than splitting into \"chunks\". We recommend lowering `batch_size` if you do not have a lot of RAM or VRAM and run into CUDA memory error. Set a higher value if you are using a high-end GPU and the VRAM is not fully utilized.\n\n\n### `dlt.utils` module\n\nAn alternative to `mt.available_languages()` is the `dlt.utils` module. You can use it to find out which languages and codes are available:\n\n```python\nprint(dlt.utils.available_languages('mbart50'))  # All languages that you can use\nprint(dlt.utils.available_codes('mbart50'))  # Code corresponding to each language accepted\nprint(dlt.utils.get_lang_code_map('mbart50'))  # Dictionary of lang -> code\nprint(dlt.utils.available_languages('m2m100'))  # write the name of the model family\n```\n\nAt the moment, the following models are accepted:\n- `\"mbart50\"`\n- `\"m2m100\"`\n- `\"nllb200\"`\n\n### Offline usage\n\nUnlike the Google translate or MSFT Translator APIs, this library can be fully used offline. However, you will need to first download the packages and models, and move them to your offline environment to be installed and loaded inside a venv.\n\nFirst, run in your terminal:\n```bash\nmkdir dlt\ncd dlt\nmkdir libraries\npip download -d libraries/ dl-translate\n```\n\nOnce all the required packages are downloaded, you will need to use huggingface hub to download the files. Install it with `pip install huggingface-hub`. Then, run inside Python:\n```python\nimport shutil\nimport huggingface_hub as hub\n\ndirname = hub.snapshot_download(\"facebook/m2m100_418M\")\nshutil.copytree(dirname, \"cached_model_m2m100\")  # Copy to a permanent folder\n```\n\nNow, move everything in the `dlt` directory to your offline environment. Create a virtual environment and run the following in terminal:\n```bash\npip install --no-index --find-links libraries/ dl-translate\n```\n\nNow, run inside Python:\n```python\nimport dl_translate as dlt\n\nmt = dlt.TranslationModel(\"cached_model_m2m100\", model_family=\"m2m100\")\n```\n\n## Advanced\n\nThe following section assumes you have knowledge of PyTorch and Huggingface Transformers.\n\n### Saving and loading\n\nIf you wish to accelerate the loading time the translation model, you can use `save_obj`. Later you can reload it with `load_obj` by specifying the same directory that you are using to save.\n\n```python\nmt = dlt.TranslationModel()\n# ...\nmt.save_obj('saved_model')\n# ...\nmt = dlt.TranslationModel.load_obj('saved_model')\n```\n\n**Warning:** Only use this if you are certain the torch module saved in `saved_model/weights.pt` can be correctly loaded. Indeed, it is possible that the `huggingface`, `torch` or some other dependencies change between when you called `save_obj` and `load_obj`, and that might break your code. Thus, it is recommend to only run `load_obj` in the same environment/session as `save_obj`. **Note this method might be deprecated in the future once there's no speed benefit in loading this way.**\n\n\n### Interacting with underlying model and tokenizer\n\nWhen initializing `model`, you can pass in arguments for the underlying BART model and tokenizer (which will respectively be passed to `ModelForConditionalGeneration.from_pretrained` and `TokenizerFast.from_pretrained`):\n\n```python\nmt = dlt.TranslationModel(\n    model_options=dict(\n        state_dict=...,\n        cache_dir=...,\n        ...\n    ),\n    tokenizer_options=dict(\n        tokenizer_file=...,\n        eos_token=...,\n        ...\n    )\n)\n```\n\nYou can also access the underlying `transformers` model and `tokenizer`:\n```python\ntransformers_model = mt.get_transformers_model()\ntokenizer = mt.get_tokenizer()\n```\n\nFor more information about the models themselves, please read the docs on [mBART](https://huggingface.co/transformers/master/model_doc/mbart.html) and [m2m100](https://huggingface.co/transformers/model_doc/m2m_100.html).\n\n\n### Keyword arguments for the `generate()` method of the underlying model\n\nWhen running `mt.translate`, you can also give a `generation_options` dictionary that is passed as keyword arguments to the underlying `mt.get_transformers_model().generate()` method:\n```python\nmt.translate(\n    text,\n    source=dlt.lang.GERMAN,\n    target=dlt.lang.SPANISH,\n    generation_options=dict(num_beams=5, max_length=...)\n)\n```\n\nLearn more in the [huggingface docs](https://huggingface.co/transformers/main_classes/model.html#transformers.generation_utils.GenerationMixin.generate).\n"
  },
  {
    "path": "docs/references.md",
    "content": "# API Reference\n\n\n## dlt.TranslationModel\n\n\n### __init__\n\n```python\ndlt.TranslationModel.__init__(self, model_or_path: str = 'm2m100', tokenizer_path: str = None, device: str = 'auto', model_family: str = None, model_options: dict = None, tokenizer_options: dict = None)\n```\n\n*Instantiates a multilingual transformer model for translation.*\n\n| Parameter | Type | Default | Description |\n|-|-|-|-|\n| **model_or_path** | *str* | `m2m100` | The path or the name of the model. Equivalent to the first argument of `AutoModel.from_pretrained()`. You can also specify shorthands (\"mbart50\" and \"m2m100\").\n| **tokenizer_path** | *str* | *optional* | The path to the tokenizer. By default, it will be set to `model_or_path`.\n| **device** | *str* | `auto` | \"cpu\", \"gpu\" or \"auto\". If it's set to \"auto\", will try to select a GPU when available or else fall back to CPU.\n| **model_family** | *str* | *optional* | Either \"mbart50\" or \"m2m100\". By default, it will be inferred based on `model_or_path`. Needs to be explicitly set if `model_or_path` is a path.\n| **model_options** | *dict* | *optional* | The keyword arguments passed to the model, which is a transformer for conditional generation.\n| **tokenizer_options** | *dict* | *optional* | The keyword arguments passed to the model's tokenizer.\n\n<br>\n\n\n### translate\n\n```python\ndlt.TranslationModel.translate(self, text: Union[str, List[str]], source: str, target: str, batch_size: int = 32, verbose: bool = False, generation_options: dict = None) -> Union[str, List[str]]\n```\n\n*Translates a string or a list of strings from a source to a target language.*\n\n| Parameter | Type | Default | Description |\n|-|-|-|-|\n| **text** | *Union[str, List[str]]* | *required* | The content you want to translate.\n| **source** | *str* | *required* | The language of the original text.\n| **target** | *str* | *required* | The language of the translated text.\n| **batch_size** | *int* | `32` | The number of samples to load at once. If set to `None`, it will process everything at once.\n| **verbose** | *bool* | `False` | Whether to display the progress bar for every batch processed.\n| **generation_options** | *dict* | *optional* | The keyword arguments passed to `model.generate()`, where `model` is the underlying transformers model.\n\nNote:\n- Run `print(dlt.utils.available_languages())` to see what's available.\n- A smaller value is preferred for `batch_size` if your (video) RAM is limited.\n\n<br>\n\n\n### get_transformers_model\n\n```python\ndlt.TranslationModel.get_transformers_model(self)\n```\n\n*Retrieve the underlying mBART transformer model.*\n\n<br>\n\n\n### get_tokenizer\n\n```python\ndlt.TranslationModel.get_tokenizer(self)\n```\n\n*Retrieve the mBART huggingface tokenizer.*\n\n<br>\n\n\n### available_codes\n\n```python\ndlt.TranslationModel.available_codes(self) -> List[str]\n```\n\n*Returns all the available codes for a given `dlt.TranslationModel`\ninstance.*\n\n<br>\n\n\n### available_languages\n\n```python\ndlt.TranslationModel.available_languages(self) -> List[str]\n```\n\n*Returns all the available languages for a given `dlt.TranslationModel`\ninstance.*\n\n<br>\n\n\n### get_lang_code_map\n\n```python\ndlt.TranslationModel.get_lang_code_map(self) -> Dict[str, str]\n```\n\n*Returns the language -> codes dictionary for a given `dlt.TranslationModel`\ninstance.*\n\n<br>\n\n\n### save_obj\n\n```python\ndlt.TranslationModel.save_obj(self, path: str = 'saved_model') -> None\n```\n\n*Saves your model as a torch object and save your tokenizer.*\n\n| Parameter | Type | Default | Description |\n|-|-|-|-|\n| **path** | *str* | `saved_model` | The directory where you want to save your model and tokenizer\n\n<br>\n\n\n### load_obj\n\n```python\ndlt.TranslationModel.load_obj(path: str = 'saved_model', **kwargs)\n```\n\n*Initialize `dlt.TranslationModel` from the torch object and tokenizer\nsaved with `dlt.TranslationModel.save_obj`*\n\n| Parameter | Type | Default | Description |\n|-|-|-|-|\n| **path** | *str* | `saved_model` | The directory where your torch model and tokenizer are stored\n\n<br>\n\n\n\n<br>\n\n\n## dlt.utils\n\n\n### get_lang_code_map\n\n```python\ndlt.utils.get_lang_code_map(weights: str = 'mbart50') -> Dict[str, str]\n```\n\n*Get a dictionary mapping a language -> code for a given model. The code will depend on the model you choose.*\n\n| Parameter | Type | Default | Description |\n|-|-|-|-|\n| **weights** | *str* | `mbart50` | The name of the model you are using. For example, \"mbart50\" is the multilingual BART Large with 50 languages available to use.\n\n<br>\n\n\n### available_codes\n\n```python\ndlt.utils.available_codes(weights: str = 'mbart50') -> List[str]\n```\n\n*Get all the codes available for a given model. The code format will depend on the model you select.*\n\n| Parameter | Type | Default | Description |\n|-|-|-|-|\n| **weights** | *str* | `mbart50` | The name of the model you are using. For example, \"mbart50\" is the multilingual BART Large with 50 codes available to use.\n\n<br>\n\n\n### available_languages\n\n```python\ndlt.utils.available_languages(weights: str = 'mbart50') -> List[str]\n```\n\n*Get all the languages available for a given model.*\n\n| Parameter | Type | Default | Description |\n|-|-|-|-|\n| **weights** | *str* | `mbart50` | The name of the model you are using. For example, \"mbart50\" is the multilingual BART Large with 50 languages available to use.\n\n<br>\n\n\n\n<br>\n\n"
  },
  {
    "path": "docs/requirements.txt",
    "content": "mkdocs\nmkdocs-material\njinja2<3.1.0\n"
  },
  {
    "path": "mkdocs-rtd.yml",
    "content": "site_name: DL Translate\nrepo_url: https://github.com/xhluca/dl-translate\nedit_uri: blob/main/docs/\nnav:\n  - index.md\n  - references.md\n  - contributing.md\n  - available_languages.md\ntheme: readthedocs\n"
  },
  {
    "path": "mkdocs.yml",
    "content": "site_name: DL Translate\nrepo_url: https://github.com/xhluca/dl-translate\nnav:\n  - index.md\n  - references.md\n  - contributing.md\n  - available_languages.md\ntheme: material\nmarkdown_extensions:\n  - pymdownx.highlight\n  - pymdownx.superfences\n"
  },
  {
    "path": "scripts/generate_langs.py",
    "content": "import json\nimport os\n\n\ndef name_to_var(lang_name):\n    return (\n        lang_name.upper()\n        .replace(\" \", \"_\")\n        .replace(\"(\", \"\")\n        .replace(\")\", \"\")\n        .replace(\"-\", \"_\")\n        .replace(\"'\", \"\")\n    )\n\n\ndef load_json(name):\n    filepath = os.path.join(os.path.dirname(__file__), \"langs_coverage\", f\"{name}.json\")\n    return json.loads(open(filepath).read())\n\n\nauto_gen_comment = f\"# Auto-generated. Do not modify, use {__file__} instead.\\n\"\n\nname2json = {}\n\nfor name in [\"m2m100\", \"mbart50\", \"nllb200\"]:\n    name2json[name] = lang2code = load_json(name)\n\n    with open(f\"./dl_translate/lang/{name}.py\", \"w\") as f:\n        f.write(auto_gen_comment)\n        for lang, code in lang2code.items():\n            f.write(f'{name_to_var(lang)} = \"{lang}\"\\n')\n\n\nwith open(\"./dl_translate/_pairs.py\", \"w\") as f:\n    f.write(auto_gen_comment)\n\n    for name, lang2code in name2json.items():\n        f.write(f\"_PAIRS_{name.upper()} = {tuple(lang2code.items())}\\n\")\n"
  },
  {
    "path": "scripts/langs_coverage/m2m100.json",
    "content": "{\n    \"Afrikaans\": \"af\",\n    \"Amharic\": \"am\",\n    \"Arabic\": \"ar\",\n    \"Asturian\": \"ast\",\n    \"Azerbaijani\": \"az\",\n    \"Bashkir\": \"ba\",\n    \"Belarusian\": \"be\",\n    \"Bulgarian\": \"bg\",\n    \"Bengali\": \"bn\",\n    \"Breton\": \"br\",\n    \"Bosnian\": \"bs\",\n    \"Catalan\": \"ca\",\n    \"Valencian\": \"ca\",\n    \"Cebuano\": \"ceb\",\n    \"Czech\": \"cs\",\n    \"Welsh\": \"cy\",\n    \"Danish\": \"da\",\n    \"German\": \"de\",\n    \"Greek\": \"el\",\n    \"English\": \"en\",\n    \"Spanish\": \"es\",\n    \"Estonian\": \"et\",\n    \"Persian\": \"fa\",\n    \"Fulah\": \"ff\",\n    \"Finnish\": \"fi\",\n    \"French\": \"fr\",\n    \"Western Frisian\": \"fy\",\n    \"Irish\": \"ga\",\n    \"Gaelic\": \"gd\",\n    \"Scottish Gaelic\": \"gd\",\n    \"Galician\": \"gl\",\n    \"Gujarati\": \"gu\",\n    \"Hausa\": \"ha\",\n    \"Hebrew\": \"he\",\n    \"Hindi\": \"hi\",\n    \"Croatian\": \"hr\",\n    \"Haitian\": \"ht\",\n    \"Haitian Creole\": \"ht\",\n    \"Hungarian\": \"hu\",\n    \"Armenian\": \"hy\",\n    \"Indonesian\": \"id\",\n    \"Igbo\": \"ig\",\n    \"Iloko\": \"ilo\",\n    \"Icelandic\": \"is\",\n    \"Italian\": \"it\",\n    \"Japanese\": \"ja\",\n    \"Javanese\": \"jv\",\n    \"Georgian\": \"ka\",\n    \"Kazakh\": \"kk\",\n    \"Khmer\": \"km\",\n    \"Central Khmer\": \"km\",\n    \"Kannada\": \"kn\",\n    \"Korean\": \"ko\",\n    \"Luxembourgish\": \"lb\",\n    \"Letzeburgesch\": \"lb\",\n    \"Ganda\": \"lg\",\n    \"Lingala\": \"ln\",\n    \"Lao\": \"lo\",\n    \"Lithuanian\": \"lt\",\n    \"Latvian\": \"lv\",\n    \"Malagasy\": \"mg\",\n    \"Macedonian\": \"mk\",\n    \"Malayalam\": \"ml\",\n    \"Mongolian\": \"mn\",\n    \"Marathi\": \"mr\",\n    \"Malay\": \"ms\",\n    \"Burmese\": \"my\",\n    \"Nepali\": \"ne\",\n    \"Dutch\": \"nl\",\n    \"Flemish\": \"nl\",\n    \"Norwegian\": \"no\",\n    \"Northern Sotho\": \"ns\",\n    \"Occitan\": \"oc\",\n    \"Oriya\": \"or\",\n    \"Panjabi\": \"pa\",\n    \"Punjabi\": \"pa\",\n    \"Polish\": \"pl\",\n    \"Pushto\": \"ps\",\n    \"Pashto\": \"ps\",\n    \"Portuguese\": \"pt\",\n    \"Romanian\": \"ro\",\n    \"Moldavian\": \"ro\",\n    \"Moldovan\": \"ro\",\n    \"Russian\": \"ru\",\n    \"Sindhi\": \"sd\",\n    \"Sinhala\": \"si\",\n    \"Sinhalese\": \"si\",\n    \"Slovak\": \"sk\",\n    \"Slovenian\": \"sl\",\n    \"Somali\": \"so\",\n    \"Albanian\": \"sq\",\n    \"Serbian\": \"sr\",\n    \"Swati\": \"ss\",\n    \"Sundanese\": \"su\",\n    \"Swedish\": \"sv\",\n    \"Swahili\": \"sw\",\n    \"Tamil\": \"ta\",\n    \"Thai\": \"th\",\n    \"Tagalog\": \"tl\",\n    \"Tswana\": \"tn\",\n    \"Turkish\": \"tr\",\n    \"Ukrainian\": \"uk\",\n    \"Urdu\": \"ur\",\n    \"Uzbek\": \"uz\",\n    \"Vietnamese\": \"vi\",\n    \"Wolof\": \"wo\",\n    \"Xhosa\": \"xh\",\n    \"Yiddish\": \"yi\",\n    \"Yoruba\": \"yo\",\n    \"Chinese\": \"zh\",\n    \"Zulu\": \"zu\"\n}"
  },
  {
    "path": "scripts/langs_coverage/mbart50.json",
    "content": "{\n    \"Arabic\": \"ar_AR\",\n    \"Czech\": \"cs_CZ\",\n    \"German\": \"de_DE\",\n    \"English\": \"en_XX\",\n    \"Spanish\": \"es_XX\",\n    \"Estonian\": \"et_EE\",\n    \"Finnish\": \"fi_FI\",\n    \"French\": \"fr_XX\",\n    \"Gujarati\": \"gu_IN\",\n    \"Hindi\": \"hi_IN\",\n    \"Italian\": \"it_IT\",\n    \"Japanese\": \"ja_XX\",\n    \"Kazakh\": \"kk_KZ\",\n    \"Korean\": \"ko_KR\",\n    \"Lithuanian\": \"lt_LT\",\n    \"Latvian\": \"lv_LV\",\n    \"Burmese\": \"my_MM\",\n    \"Nepali\": \"ne_NP\",\n    \"Dutch\": \"nl_XX\",\n    \"Romanian\": \"ro_RO\",\n    \"Russian\": \"ru_RU\",\n    \"Sinhala\": \"si_LK\",\n    \"Turkish\": \"tr_TR\",\n    \"Vietnamese\": \"vi_VN\",\n    \"Chinese\": \"zh_CN\",\n    \"Afrikaans\": \"af_ZA\",\n    \"Azerbaijani\": \"az_AZ\",\n    \"Bengali\": \"bn_IN\",\n    \"Persian\": \"fa_IR\",\n    \"Hebrew\": \"he_IL\",\n    \"Croatian\": \"hr_HR\",\n    \"Indonesian\": \"id_ID\",\n    \"Georgian\": \"ka_GE\",\n    \"Khmer\": \"km_KH\",\n    \"Macedonian\": \"mk_MK\",\n    \"Malayalam\": \"ml_IN\",\n    \"Mongolian\": \"mn_MN\",\n    \"Marathi\": \"mr_IN\",\n    \"Polish\": \"pl_PL\",\n    \"Pashto\": \"ps_AF\",\n    \"Portuguese\": \"pt_XX\",\n    \"Swedish\": \"sv_SE\",\n    \"Swahili\": \"sw_KE\",\n    \"Tamil\": \"ta_IN\",\n    \"Telugu\": \"te_IN\",\n    \"Thai\": \"th_TH\",\n    \"Tagalog\": \"tl_XX\",\n    \"Ukrainian\": \"uk_UA\",\n    \"Urdu\": \"ur_PK\",\n    \"Xhosa\": \"xh_ZA\",\n    \"Galician\": \"gl_ES\",\n    \"Slovene\": \"sl_SI\"\n}"
  },
  {
    "path": "scripts/langs_coverage/nllb200.json",
    "content": "{\n    \"Acehnese (Arabic script)\": \"ace_Arab\",\n    \"Acehnese (Latin script)\": \"ace_Latn\",\n    \"Mesopotamian Arabic\": \"acm_Arab\",\n    \"Ta'izzi-Adeni Arabic\": \"acq_Arab\",\n    \"Tunisian Arabic\": \"aeb_Arab\",\n    \"Afrikaans\": \"afr_Latn\",\n    \"South Levantine Arabic\": \"ajp_Arab\",\n    \"Akan\": \"aka_Latn\",\n    \"Amharic\": \"amh_Ethi\",\n    \"North Levantine Arabic\": \"apc_Arab\",\n    \"Modern Standard Arabic\": \"arb_Arab\",\n    \"Modern Standard Arabic (Romanized)\": \"arb_Latn\",\n    \"Najdi Arabic\": \"ars_Arab\",\n    \"Moroccan Arabic\": \"ary_Arab\",\n    \"Egyptian Arabic\": \"arz_Arab\",\n    \"Assamese\": \"asm_Beng\",\n    \"Asturian\": \"ast_Latn\",\n    \"Awadhi\": \"awa_Deva\",\n    \"Central Aymara\": \"ayr_Latn\",\n    \"South Azerbaijani\": \"azb_Arab\",\n    \"North Azerbaijani\": \"azj_Latn\",\n    \"Bashkir\": \"bak_Cyrl\",\n    \"Bambara\": \"bam_Latn\",\n    \"Balinese\": \"ban_Latn\",\n    \"Belarusian\": \"bel_Cyrl\",\n    \"Bemba\": \"bem_Latn\",\n    \"Bengali\": \"ben_Beng\",\n    \"Bhojpuri\": \"bho_Deva\",\n    \"Banjar (Arabic script)\": \"bjn_Arab\",\n    \"Banjar (Latin script)\": \"bjn_Latn\",\n    \"Standard Tibetan\": \"bod_Tibt\",\n    \"Bosnian\": \"bos_Latn\",\n    \"Buginese\": \"bug_Latn\",\n    \"Bulgarian\": \"bul_Cyrl\",\n    \"Catalan\": \"cat_Latn\",\n    \"Cebuano\": \"ceb_Latn\",\n    \"Czech\": \"ces_Latn\",\n    \"Chokwe\": \"cjk_Latn\",\n    \"Central Kurdish\": \"ckb_Arab\",\n    \"Crimean Tatar\": \"crh_Latn\",\n    \"Welsh\": \"cym_Latn\",\n    \"Danish\": \"dan_Latn\",\n    \"German\": \"deu_Latn\",\n    \"Southwestern Dinka\": \"dik_Latn\",\n    \"Dyula\": \"dyu_Latn\",\n    \"Dzongkha\": \"dzo_Tibt\",\n    \"Greek\": \"ell_Grek\",\n    \"English\": \"eng_Latn\",\n    \"Esperanto\": \"epo_Latn\",\n    \"Estonian\": \"est_Latn\",\n    \"Basque\": \"eus_Latn\",\n    \"Ewe\": \"ewe_Latn\",\n    \"Faroese\": \"fao_Latn\",\n    \"Fijian\": \"fij_Latn\",\n    \"Finnish\": \"fin_Latn\",\n    \"Fon\": \"fon_Latn\",\n    \"French\": \"fra_Latn\",\n    \"Friulian\": \"fur_Latn\",\n    \"Nigerian Fulfulde\": \"fuv_Latn\",\n    \"Scottish Gaelic\": \"gla_Latn\",\n    \"Irish\": \"gle_Latn\",\n    \"Galician\": \"glg_Latn\",\n    \"Guarani\": \"grn_Latn\",\n    \"Gujarati\": \"guj_Gujr\",\n    \"Haitian Creole\": \"hat_Latn\",\n    \"Hausa\": \"hau_Latn\",\n    \"Hebrew\": \"heb_Hebr\",\n    \"Hindi\": \"hin_Deva\",\n    \"Chhattisgarhi\": \"hne_Deva\",\n    \"Croatian\": \"hrv_Latn\",\n    \"Hungarian\": \"hun_Latn\",\n    \"Armenian\": \"hye_Armn\",\n    \"Igbo\": \"ibo_Latn\",\n    \"Ilocano\": \"ilo_Latn\",\n    \"Indonesian\": \"ind_Latn\",\n    \"Icelandic\": \"isl_Latn\",\n    \"Italian\": \"ita_Latn\",\n    \"Javanese\": \"jav_Latn\",\n    \"Japanese\": \"jpn_Jpan\",\n    \"Kabyle\": \"kab_Latn\",\n    \"Jingpho\": \"kac_Latn\",\n    \"Kamba\": \"kam_Latn\",\n    \"Kannada\": \"kan_Knda\",\n    \"Kashmiri (Arabic script)\": \"kas_Arab\",\n    \"Kashmiri (Devanagari script)\": \"kas_Deva\",\n    \"Georgian\": \"kat_Geor\",\n    \"Central Kanuri (Arabic script)\": \"knc_Arab\",\n    \"Central Kanuri (Latin script)\": \"knc_Latn\",\n    \"Kazakh\": \"kaz_Cyrl\",\n    \"Kabiyè\": \"kbp_Latn\",\n    \"Kabuverdianu\": \"kea_Latn\",\n    \"Khmer\": \"khm_Khmr\",\n    \"Kikuyu\": \"kik_Latn\",\n    \"Kinyarwanda\": \"kin_Latn\",\n    \"Kyrgyz\": \"kir_Cyrl\",\n    \"Kimbundu\": \"kmb_Latn\",\n    \"Northern Kurdish\": \"kmr_Latn\",\n    \"Kikongo\": \"kon_Latn\",\n    \"Korean\": \"kor_Hang\",\n    \"Lao\": \"lao_Laoo\",\n    \"Ligurian\": \"lij_Latn\",\n    \"Limburgish\": \"lim_Latn\",\n    \"Lingala\": \"lin_Latn\",\n    \"Lithuanian\": \"lit_Latn\",\n    \"Lombard\": \"lmo_Latn\",\n    \"Latgalian\": \"ltg_Latn\",\n    \"Luxembourgish\": \"ltz_Latn\",\n    \"Luba-Kasai\": \"lua_Latn\",\n    \"Ganda\": \"lug_Latn\",\n    \"Luo\": \"luo_Latn\",\n    \"Mizo\": \"lus_Latn\",\n    \"Standard Latvian\": \"lvs_Latn\",\n    \"Magahi\": \"mag_Deva\",\n    \"Maithili\": \"mai_Deva\",\n    \"Malayalam\": \"mal_Mlym\",\n    \"Marathi\": \"mar_Deva\",\n    \"Minangkabau (Arabic script)\": \"min_Arab\",\n    \"Minangkabau (Latin script)\": \"min_Latn\",\n    \"Macedonian\": \"mkd_Cyrl\",\n    \"Plateau Malagasy\": \"plt_Latn\",\n    \"Maltese\": \"mlt_Latn\",\n    \"Meitei (Bengali script)\": \"mni_Beng\",\n    \"Halh Mongolian\": \"khk_Cyrl\",\n    \"Mossi\": \"mos_Latn\",\n    \"Maori\": \"mri_Latn\",\n    \"Burmese\": \"mya_Mymr\",\n    \"Dutch\": \"nld_Latn\",\n    \"Norwegian Nynorsk\": \"nno_Latn\",\n    \"Norwegian Bokmål\": \"nob_Latn\",\n    \"Nepali\": \"npi_Deva\",\n    \"Northern Sotho\": \"nso_Latn\",\n    \"Nuer\": \"nus_Latn\",\n    \"Nyanja\": \"nya_Latn\",\n    \"Occitan\": \"oci_Latn\",\n    \"West Central Oromo\": \"gaz_Latn\",\n    \"Odia\": \"ory_Orya\",\n    \"Pangasinan\": \"pag_Latn\",\n    \"Eastern Panjabi\": \"pan_Guru\",\n    \"Papiamento\": \"pap_Latn\",\n    \"Western Persian\": \"pes_Arab\",\n    \"Polish\": \"pol_Latn\",\n    \"Portuguese\": \"por_Latn\",\n    \"Dari\": \"prs_Arab\",\n    \"Southern Pashto\": \"pbt_Arab\",\n    \"Ayacucho Quechua\": \"quy_Latn\",\n    \"Romanian\": \"ron_Latn\",\n    \"Rundi\": \"run_Latn\",\n    \"Russian\": \"rus_Cyrl\",\n    \"Sango\": \"sag_Latn\",\n    \"Sanskrit\": \"san_Deva\",\n    \"Santali\": \"sat_Olck\",\n    \"Sicilian\": \"scn_Latn\",\n    \"Shan\": \"shn_Mymr\",\n    \"Sinhala\": \"sin_Sinh\",\n    \"Slovak\": \"slk_Latn\",\n    \"Slovenian\": \"slv_Latn\",\n    \"Samoan\": \"smo_Latn\",\n    \"Shona\": \"sna_Latn\",\n    \"Sindhi\": \"snd_Arab\",\n    \"Somali\": \"som_Latn\",\n    \"Southern Sotho\": \"sot_Latn\",\n    \"Spanish\": \"spa_Latn\",\n    \"Tosk Albanian\": \"als_Latn\",\n    \"Sardinian\": \"srd_Latn\",\n    \"Serbian\": \"srp_Cyrl\",\n    \"Swati\": \"ssw_Latn\",\n    \"Sundanese\": \"sun_Latn\",\n    \"Swedish\": \"swe_Latn\",\n    \"Swahili\": \"swh_Latn\",\n    \"Silesian\": \"szl_Latn\",\n    \"Tamil\": \"tam_Taml\",\n    \"Tatar\": \"tat_Cyrl\",\n    \"Telugu\": \"tel_Telu\",\n    \"Tajik\": \"tgk_Cyrl\",\n    \"Tagalog\": \"tgl_Latn\",\n    \"Thai\": \"tha_Thai\",\n    \"Tigrinya\": \"tir_Ethi\",\n    \"Tamasheq (Latin script)\": \"taq_Latn\",\n    \"Tamasheq (Tifinagh script)\": \"taq_Tfng\",\n    \"Tok Pisin\": \"tpi_Latn\",\n    \"Tswana\": \"tsn_Latn\",\n    \"Tsonga\": \"tso_Latn\",\n    \"Turkmen\": \"tuk_Latn\",\n    \"Tumbuka\": \"tum_Latn\",\n    \"Turkish\": \"tur_Latn\",\n    \"Twi\": \"twi_Latn\",\n    \"Central Atlas Tamazight\": \"tzm_Tfng\",\n    \"Uyghur\": \"uig_Arab\",\n    \"Ukrainian\": \"ukr_Cyrl\",\n    \"Umbundu\": \"umb_Latn\",\n    \"Urdu\": \"urd_Arab\",\n    \"Northern Uzbek\": \"uzn_Latn\",\n    \"Venetian\": \"vec_Latn\",\n    \"Vietnamese\": \"vie_Latn\",\n    \"Waray\": \"war_Latn\",\n    \"Wolof\": \"wol_Latn\",\n    \"Xhosa\": \"xho_Latn\",\n    \"Eastern Yiddish\": \"ydd_Hebr\",\n    \"Yoruba\": \"yor_Latn\",\n    \"Yue Chinese\": \"yue_Hant\",\n    \"Chinese (Simplified)\": \"zho_Hans\",\n    \"Chinese (Traditional)\": \"zho_Hant\",\n    \"Standard Malay\": \"zsm_Latn\",\n    \"Zulu\": \"zul_Latn\"\n}"
  },
  {
    "path": "scripts/render_available_langs.py",
    "content": "import os\nimport json\n\nfrom jinja2 import Template\n\n\ndef load_json(name):\n    filepath = os.path.join(os.path.dirname(__file__), \"langs_coverage\", f\"{name}.json\")\n    return json.loads(open(filepath).read())\n\n\ntemplate_values = {}\nfor name in [\"m2m100\", \"mbart50\", \"nllb200\"]:\n    content = \"\"\n    di = load_json(name)\n\n    content += \"| Language Name | Code |\\n\"\n    content += \"| --- | --- |\\n\"\n    for key, val in di.items():\n        content += f\"| {key} | {val} |\\n\"\n\n    template_values[name] = content\n\n\ntemplate_path = os.path.join(\n    os.path.dirname(__file__), \"templates\", \"available_languages.md.jinja2\"\n)\nsave_path = os.path.join(\n    os.path.dirname(__file__), \"..\", \"docs\", \"available_languages.md\"\n)\n\nwith open(template_path) as f:\n    template = Template(f.read())\n\nrendered = template.render(template_values)\n\nwith open(save_path, \"w\") as f:\n    f.write(rendered)\n"
  },
  {
    "path": "scripts/render_references.py",
    "content": "import os\nfrom typing import NamedTuple, List, Optional, Any, NamedTuple\nimport inspect\n\nfrom jinja2 import Template\n\nimport dl_translate as dlt\n\n\ntype2str = {\n    int: \"int\",\n    float: \"float\",\n    str: \"str\",\n    bool: \"bool\",\n    dict: \"dict\",\n    inspect._empty: \"unspecified\",\n    Optional[Any]: \"optional\",\n}\n\ndefault2str = {inspect._empty: \"*required*\", None: \"*optional*\"}\n\n\ndef preprocess_annot(annotation):\n    annotation = type2str.get(annotation, str(annotation))\n\n    return annotation.replace(\"typing.\", \"\")\n\n\ndef preprocess_default(default):\n    default = default2str.get(default, f\"`{default}`\")\n    return default\n\n\nclass FunctionReference:\n    def __init__(self, function, modname=None):\n        self.func = function\n\n        if modname is None:\n            self.modname = inspect.getmodule(self.func).__name__.replace(\n                \"dl_translate\", \"dlt\"\n            )\n        else:\n            self.modname = modname\n\n    @property\n    def name(self):\n        return self.func.__name__\n\n    @property\n    def signature(self):\n        return inspect.signature(self.func)\n\n    @property\n    def sig_desc(self):\n        return self.modname + \".\" + self.name + str(self.signature)\n\n    @property\n    def doc(self):\n        doc_template = Template(inspect.getdoc(self.func))\n        kwargs = {\"params\": \"| Parameter | Type | Default | Description |\\n|-|-|-|-|\"}\n        for arg_name, param in self.signature.parameters.items():\n            if arg_name == \"self\":\n                continue\n            annot = preprocess_annot(param.annotation)\n            default = preprocess_default(param.default)\n\n            kwargs[arg_name] = f\"| **{arg_name}** | *{annot}* | {default} |\"\n\n        return doc_template.render(**kwargs)\n\n\nclass ModuleReferences(NamedTuple):\n    name: str\n    funcs: List[FunctionReference]\n\n\ntemplate_path = os.path.join(\n    os.path.dirname(__file__), \"templates\", \"references.md.jinja2\"\n)\nsave_path = os.path.join(os.path.dirname(__file__), \"..\", \"docs\", \"references.md\")\n\nwith open(template_path) as f:\n    template = Template(f.read())\n\n\nrendered = template.render(\n    modules=[\n        ModuleReferences(\n            \"dlt.TranslationModel\",\n            [\n                FunctionReference(\n                    dlt.TranslationModel.__init__, \"dlt.TranslationModel\"\n                ),\n                FunctionReference(\n                    dlt.TranslationModel.translate, \"dlt.TranslationModel\"\n                ),\n                FunctionReference(\n                    dlt.TranslationModel.get_transformers_model, \"dlt.TranslationModel\"\n                ),\n                FunctionReference(\n                    dlt.TranslationModel.get_tokenizer, \"dlt.TranslationModel\"\n                ),\n                FunctionReference(\n                    dlt.TranslationModel.available_codes, \"dlt.TranslationModel\"\n                ),\n                FunctionReference(\n                    dlt.TranslationModel.available_languages, \"dlt.TranslationModel\"\n                ),\n                FunctionReference(\n                    dlt.TranslationModel.get_lang_code_map, \"dlt.TranslationModel\"\n                ),\n                FunctionReference(\n                    dlt.TranslationModel.save_obj, \"dlt.TranslationModel\"\n                ),\n                FunctionReference(\n                    dlt.TranslationModel.load_obj, \"dlt.TranslationModel\"\n                ),\n            ],\n        ),\n        ModuleReferences(\n            \"dlt.utils\",\n            [\n                FunctionReference(dlt.utils.get_lang_code_map),\n                FunctionReference(dlt.utils.available_codes),\n                FunctionReference(dlt.utils.available_languages),\n            ],\n        ),\n    ]\n)\n\nwith open(save_path, \"w\") as f:\n    f.write(rendered)\n"
  },
  {
    "path": "scripts/templates/available_languages.md.jinja2",
    "content": "# Languages Available\n\nThis page gives all the languages available for each model family.\n\n## MBart 50\n\n{{mbart50}}\n\n## M2M-100\n\n{{m2m100}}\n\n## NLLB-200\n\n{{nllb200}}"
  },
  {
    "path": "scripts/templates/references.md.jinja2",
    "content": "# API Reference\n\n{% for module in modules %}\n## {{module.name}}\n\n{% for func in module.funcs %}\n### {{ func.name }}\n\n```python\n{{ func.sig_desc }}\n```\n\n{{ func.doc }}\n\n<br>\n\n{% endfor %}\n\n<br>\n\n{% endfor %}"
  },
  {
    "path": "setup.py",
    "content": "import setuptools\n\nwith open(\"README.md\", \"r\", encoding=\"utf-8\") as fh:\n    long_description = fh.read()\n\nsetuptools.setup(\n    name=\"dl-translate\",\n    version=\"0.3.1\",\n    author=\"Xing Han Lu\",\n    author_email=\"github@xinghanlu.com\",\n    description=\"A deep learning-based translation library built on Huggingface transformers\",\n    long_description=long_description,\n    long_description_content_type=\"text/markdown\",\n    url=\"https://github.com/xhlulu/dl-translate\",\n    classifiers=[\n        \"Programming Language :: Python :: 3\",\n        \"License :: OSI Approved :: MIT License\",\n        \"Operating System :: OS Independent\",\n    ],\n    packages=setuptools.find_packages(),\n    python_requires=\">=3.7\",\n    install_requires=[\n        \"transformers>=4.30.2\",\n        \"torch>=2.0.0\",\n        \"sentencepiece\",\n        \"protobuf\",\n        \"tqdm\",\n    ],\n    extras_require={\"dev\": [\"pytest\", \"black\", \"jinja2\", \"mkdocs\", \"mkdocs-material\"]},\n)\n"
  },
  {
    "path": "tests/long/test_save_load.py",
    "content": "import os\n\nimport dl_translate as dlt\n\n\ndef test_save():\n    mt = dlt.TranslationModel()\n    mt.save_obj(\"saved_model\")\n    assert os.path.exists(\"saved_model/weights.pt\")\n    assert os.path.exists(\"saved_model/tokenizer_config.json\")\n\n\ndef test_load():\n    mt = dlt.TranslationModel.load_obj(\"saved_model\")\n    assert isinstance(mt, dlt.TranslationModel)\n"
  },
  {
    "path": "tests/long/test_translate.py",
    "content": "import dl_translate as dlt\n\n\ndef test_translate():\n    mt = dlt.TranslationModel()\n\n    msg_en = \"Hello everyone, how are you?\"\n\n    assert (\n        mt.translate(msg_en, source=\"English\", target=\"Spanish\")\n        == \"Hola a todos, ¿cómo estás?\"\n    )\n\n    fr_1 = mt.translate(msg_en, source=\"English\", target=\"French\")\n    ch = mt.translate(msg_en, source=\"English\", target=\"Chinese\")\n    fr_2 = mt.translate([msg_en, msg_en + msg_en], source=\"English\", target=\"French\")\n\n    assert fr_1 == fr_2[0]\n    assert ch != fr_1\n\n\ndef test_mbart50():\n    mt = dlt.TranslationModel(\"mbart50\")\n\n    msg_en = \"Hello everyone, how are you?\"\n\n    fr_1 = mt.translate(msg_en, source=\"English\", target=\"French\")\n    ch = mt.translate(msg_en, source=\"English\", target=\"Chinese\")\n    fr_2 = mt.translate([msg_en, msg_en + msg_en], source=\"English\", target=\"French\")\n\n    assert fr_1 == fr_2[0]\n    assert ch != fr_1\n"
  },
  {
    "path": "tests/quick/test_lang.py",
    "content": "import dl_translate as dlt\nfrom dl_translate._pairs import _PAIRS_MBART50, _PAIRS_M2M100\n\n\ndef test_lang():\n    for l, _ in _PAIRS_M2M100:\n        assert getattr(dlt.lang, l.upper().replace(\" \", \"_\")) == l\n\n\ndef test_lang_m2m100():\n    for l, _ in _PAIRS_M2M100:\n        assert getattr(dlt.lang.m2m100, l.upper().replace(\" \", \"_\")) == l\n\n\ndef test_lang_mbart50():\n    for l, _ in _PAIRS_MBART50:\n        assert getattr(dlt.lang.mbart50, l.upper().replace(\" \", \"_\")) == l\n"
  },
  {
    "path": "tests/quick/test_translation_model.py",
    "content": "import pytest\nimport torch\n\nimport dl_translate as dlt\nfrom dl_translate._translation_model import (\n    _resolve_lang_codes,\n    _select_device,\n    _infer_model_or_path,\n    _infer_model_family,\n)\n\n\ndef test_resolve_lang_codes_mbart50():\n    sources = [dlt.lang.FRENCH, \"fr_XX\", \"French\"]\n    targets = [dlt.lang.ENGLISH, \"en_XX\", \"English\"]\n\n    for source, target in zip(sources, targets):\n        s = _resolve_lang_codes(source, \"source\", \"mbart50\")\n        t = _resolve_lang_codes(target, \"target\", \"mbart50\")\n        assert s == \"fr_XX\"\n        assert t == \"en_XX\"\n\n\ndef test_resolve_lang_codes_m2m100():\n    sources = [dlt.lang.m2m100.FRENCH, \"fr\", \"French\"]\n    targets = [dlt.lang.m2m100.ENGLISH, \"en\", \"English\"]\n\n    for source, target in zip(sources, targets):\n        s = _resolve_lang_codes(source, \"source\", \"m2m100\")\n        t = _resolve_lang_codes(target, \"target\", \"m2m100\")\n        assert s == \"fr\"\n        assert t == \"en\"\n\n\ndef test_resolve_lang_codes_m2m100():\n    sources = [dlt.lang.nllb200.FRENCH, \"fra_Latn\", \"French\"]\n    targets = [dlt.lang.nllb200.ENGLISH, \"eng_Latn\", \"English\"]\n\n    for source, target in zip(sources, targets):\n        s = _resolve_lang_codes(source, \"source\", \"nllb200\")\n        t = _resolve_lang_codes(target, \"target\", \"nllb200\")\n        assert s == \"fra_Latn\"\n        assert t == \"eng_Latn\"\n\n    sources = [\"Central Kanuri (Latin script)\"]\n    targets = [\"Ta'izzi-Adeni Arabic\"]\n    for source, target in zip(sources, targets):\n        s = _resolve_lang_codes(source, \"source\", \"nllb200\")\n        t = _resolve_lang_codes(target, \"target\", \"nllb200\")\n        assert s == \"knc_Latn\"\n        assert t == \"acq_Arab\"\n\n\ndef test_select_device():\n    assert _select_device(\"cpu\") == torch.device(\"cpu\")\n    assert _select_device(\"gpu\") == torch.device(\"cuda\")\n    assert _select_device(\"cuda:0\") == torch.device(\"cuda\", index=0)\n\n    if torch.cuda.is_available():\n        assert _select_device(\"auto\") == torch.device(\"cuda\")\n    else:\n        assert _select_device(\"auto\") == torch.device(\"cpu\")\n\n\ndef test_infer_model_or_path():\n    assert _infer_model_or_path(\"mbart50\") == \"facebook/mbart-large-50-many-to-many-mmt\"\n    assert _infer_model_or_path(\"m2m100\") == \"facebook/m2m100_418M\"\n    assert _infer_model_or_path(\"m2m100-small\") == \"facebook/m2m100_418M\"\n    assert _infer_model_or_path(\"m2m100-medium\") == \"facebook/m2m100_1.2B\"\n\n    assert _infer_model_or_path(\"non-existing-value\") == \"non-existing-value\"\n\n\ndef test_infer_model_family():\n    assert _infer_model_family(\"facebook/mbart-large-50-many-to-many-mmt\") == \"mbart50\"\n    assert _infer_model_family(\"facebook/m2m100_418M\") == \"m2m100\"\n    assert _infer_model_family(\"facebook/m2m100_1.2B\") == \"m2m100\"\n\n    with pytest.raises(ValueError):\n        _infer_model_family(\"non-existing-value\")\n"
  },
  {
    "path": "tests/quick/test_utils.py",
    "content": "import pytest\n\nfrom dl_translate import utils\nfrom dl_translate._pairs import _PAIRS_MBART50, _PAIRS_M2M100, _PAIRS_NLLB200\n\n\ndef test_dict_from_weights():\n    weights = [\n        \"mbart50\",\n        \"mbart-large-50-many-to-many-mmt\",\n        \"facebook/mbart-large-50-many-to-many-mmt\",\n        \"m2m100\",\n        \"m2m100_418M\",\n        \"m2m100_1.2B\",\n        \"facebook/m2m100_418M\",\n        \"facebook/m2m100_1.2B\",\n    ]\n\n    valid_keys = [\"langs\", \"codes\", \"pairs\"]\n\n    for w in weights:\n        assert type(utils._dict_from_weights(w)) is dict\n\n        keys = utils._dict_from_weights(w).keys()\n        for key in valid_keys:\n            assert key in keys\n\n\ndef test_dict_from_weights_exception():\n    with pytest.raises(ValueError):\n        utils._dict_from_weights(\"incorrect\")\n\n\ndef test_available_languages():\n    assert utils.available_languages() == utils.available_languages()\n\n    langs = utils.available_languages()\n\n    for lang, _ in _PAIRS_M2M100:\n        assert lang in langs\n\n    langs = utils.available_languages(\"mbart50\")\n\n    for lang, _ in _PAIRS_MBART50:\n        assert lang in langs\n\n    langs = utils.available_languages(\"nllb200\")\n\n    for lang, _ in _PAIRS_NLLB200:\n        assert lang in langs\n\n\ndef test_available_codes():\n    assert utils.available_codes() == utils.available_codes(\"m2m100\")\n\n    codes = utils.available_codes()\n\n    for _, code in _PAIRS_M2M100:\n        assert code in codes\n\n    codes = utils.available_codes(\"mbart50\")\n\n    for _, code in _PAIRS_MBART50:\n        assert code in codes\n\n    codes = utils.available_codes(\"nllb200\")\n\n    for _, code in _PAIRS_NLLB200:\n        assert code in codes\n"
  }
]