Showing preview only (1,745K chars total). Download the full file or copy to clipboard to get everything.
Repository: nebuly-ai/optimate
Branch: main
Commit: a6d302f912b4
Files: 306
Total size: 1.6 MB
Directory structure:
gitextract_7q29s3ew/
├── .gitignore
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── README.md
├── monitoring/
│ └── nebuly/
│ └── __init__.py
└── optimization/
├── .github/
│ └── workflows/
│ └── tests.yml
├── chatllama/
│ ├── LICENSE
│ ├── README.md
│ ├── artifacts/
│ │ ├── config/
│ │ │ ├── config.yaml
│ │ │ ├── ds_config.json
│ │ │ └── peft_config.yaml
│ │ ├── datasets/
│ │ │ ├── actor_dataset.json
│ │ │ ├── reward_dataset.json
│ │ │ └── rlhf_dataset.json
│ │ ├── download_dataset.py
│ │ ├── extend_rlhf_dataset.py
│ │ ├── generate_actor_dataset.py
│ │ ├── generate_rewards.py
│ │ ├── main.py
│ │ └── templates.json
│ ├── chatllama/
│ │ ├── __init__.py
│ │ ├── langchain_modules/
│ │ │ ├── __init__.py
│ │ │ └── prompt_templates.py
│ │ ├── llama_model.py
│ │ └── rlhf/
│ │ ├── __init__.py
│ │ ├── actor.py
│ │ ├── config.py
│ │ ├── dataset.py
│ │ ├── model_list.py
│ │ ├── model_loader.py
│ │ ├── reward.py
│ │ ├── trainer.py
│ │ └── utils.py
│ └── setup.py
├── cloud_surfer/
│ └── README.md
├── forward_forward/
│ ├── README.md
│ ├── forward_forward/
│ │ ├── __init__.py
│ │ ├── api/
│ │ │ ├── __init__.py
│ │ │ └── functions.py
│ │ ├── app.py
│ │ ├── operations/
│ │ │ ├── __init__.py
│ │ │ ├── build_models.py
│ │ │ ├── data.py
│ │ │ ├── fetch_operations.py
│ │ │ └── trainers.py
│ │ ├── root_op.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── labels.py
│ │ ├── modules.py
│ │ └── utils.py
│ ├── requirements.txt
│ └── setup.py
├── large_speedster/
│ └── README.md
├── nebullvm/
│ ├── .pre-commit-config.yaml
│ ├── CONTRIBUTING.md
│ ├── Dockerfile
│ ├── LICENSE
│ ├── MANIFEST.in
│ ├── README.md
│ ├── azure-pipelines.yml
│ ├── docker_build.sh
│ ├── docs/
│ │ ├── Makefile
│ │ ├── README.md
│ │ ├── conf.py
│ │ ├── index.rst
│ │ ├── modules/
│ │ │ ├── api.rst
│ │ │ ├── converters.rst
│ │ │ ├── index.rst
│ │ │ ├── inference_learners.rst
│ │ │ ├── installers.rst
│ │ │ └── optimizers.rst
│ │ └── requirements-docs.txt
│ ├── nebullvm/
│ │ ├── __init__.py
│ │ ├── api/
│ │ │ └── __init__.py
│ │ ├── apps/
│ │ │ ├── __init__.py
│ │ │ └── base.py
│ │ ├── config.py
│ │ ├── core/
│ │ │ ├── __init__.py
│ │ │ ├── models.py
│ │ │ ├── tests/
│ │ │ │ ├── __init__.py
│ │ │ │ └── test_models.py
│ │ │ └── types.py
│ │ ├── installers/
│ │ │ ├── __init__.py
│ │ │ ├── auto_installer.py
│ │ │ ├── install_bladedisc.sh
│ │ │ ├── install_fastertransformer.sh
│ │ │ ├── install_tensor_rt.sh
│ │ │ ├── install_tvm.sh
│ │ │ ├── install_tvm_prerequisites.sh
│ │ │ ├── installers.py
│ │ │ ├── tests/
│ │ │ │ ├── __init__.py
│ │ │ │ └── test_install_frameworks.py
│ │ │ └── tvm_installers/
│ │ │ ├── arm/
│ │ │ │ └── config.cmake
│ │ │ ├── arm_cuda/
│ │ │ │ └── config.cmake
│ │ │ ├── x86/
│ │ │ │ └── config.cmake
│ │ │ └── x86_cuda/
│ │ │ └── config.cmake
│ │ ├── operations/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── conversions/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── converters.py
│ │ │ │ ├── huggingface.py
│ │ │ │ ├── pytorch.py
│ │ │ │ ├── tensorflow.py
│ │ │ │ └── utils.py
│ │ │ ├── fetch_operations/
│ │ │ │ ├── __init__.py
│ │ │ │ └── local.py
│ │ │ ├── inference_learners/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── blade_disc.py
│ │ │ │ ├── builders.py
│ │ │ │ ├── deepsparse.py
│ │ │ │ ├── faster_transformer.py
│ │ │ │ ├── huggingface.py
│ │ │ │ ├── neural_compressor.py
│ │ │ │ ├── onnx.py
│ │ │ │ ├── openvino.py
│ │ │ │ ├── tensor_rt.py
│ │ │ │ ├── tensorflow.py
│ │ │ │ ├── torch_dynamo.py
│ │ │ │ ├── torch_neuron.py
│ │ │ │ ├── torch_xla.py
│ │ │ │ ├── torchscript.py
│ │ │ │ ├── tvm.py
│ │ │ │ └── utils.py
│ │ │ ├── measures/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── measures.py
│ │ │ │ └── utils.py
│ │ │ └── optimizations/
│ │ │ ├── __init__.py
│ │ │ ├── compilers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── deepsparse.py
│ │ │ │ ├── faster_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── bert/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── checkpoint_quantization.py
│ │ │ │ │ │ └── modeling_bert.py
│ │ │ │ │ └── gpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── utils/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── gpt_decoder.py
│ │ │ │ │ └── huggingface_gpt_convert.py
│ │ │ │ ├── intel_neural_compressor.py
│ │ │ │ ├── onnxruntime.py
│ │ │ │ ├── openvino.py
│ │ │ │ ├── quantizations/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── intel_neural_compressor.py
│ │ │ │ │ ├── onnx.py
│ │ │ │ │ ├── openvino.py
│ │ │ │ │ ├── pytorch.py
│ │ │ │ │ ├── tensor_rt.py
│ │ │ │ │ ├── tensorflow.py
│ │ │ │ │ ├── tvm.py
│ │ │ │ │ └── utils.py
│ │ │ │ ├── tensor_rt.py
│ │ │ │ ├── tensorflow.py
│ │ │ │ ├── torch_dynamo.py
│ │ │ │ ├── torch_neuron.py
│ │ │ │ ├── torch_xla.py
│ │ │ │ ├── torchscript.py
│ │ │ │ ├── tvm.py
│ │ │ │ └── utils.py
│ │ │ ├── compressors/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── intel.py
│ │ │ │ ├── scripts/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── neural_magic_training.py
│ │ │ │ └── sparseml.py
│ │ │ ├── optimize_inference.py
│ │ │ ├── optimizers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ └── optimizers.py
│ │ │ ├── tests/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_deepsparse.py
│ │ │ │ ├── test_intel_neural_compressor.py
│ │ │ │ ├── test_onnxruntime.py
│ │ │ │ ├── test_openvino.py
│ │ │ │ ├── test_tensor_rt.py
│ │ │ │ ├── test_tensorflow.py
│ │ │ │ ├── test_torch_dynamo.py
│ │ │ │ ├── test_torchscript.py
│ │ │ │ ├── test_tvm.py
│ │ │ │ └── utils.py
│ │ │ └── utils.py
│ │ ├── optional_modules/
│ │ │ ├── __init__.py
│ │ │ ├── blade_disc.py
│ │ │ ├── deepsparse.py
│ │ │ ├── diffusers.py
│ │ │ ├── dummy.py
│ │ │ ├── huggingface.py
│ │ │ ├── neural_compressor.py
│ │ │ ├── onnx.py
│ │ │ ├── onnxruntime.py
│ │ │ ├── onnxsim.py
│ │ │ ├── openvino.py
│ │ │ ├── tensor_rt.py
│ │ │ ├── tensorflow.py
│ │ │ ├── torch.py
│ │ │ ├── torch_neuron.py
│ │ │ ├── torch_tensorrt.py
│ │ │ ├── torch_xla.py
│ │ │ ├── tvm.py
│ │ │ └── utils.py
│ │ └── tools/
│ │ ├── __init__.py
│ │ ├── adapters.py
│ │ ├── benchmark.py
│ │ ├── data.py
│ │ ├── diffusers.py
│ │ ├── feedback_collector.py
│ │ ├── hardware_utils.py
│ │ ├── huggingface.py
│ │ ├── logger.py
│ │ ├── onnx.py
│ │ ├── pytorch.py
│ │ ├── tests/
│ │ │ ├── __init__.py
│ │ │ ├── test_data.py
│ │ │ ├── test_hardware_utils.py
│ │ │ └── test_utils.py
│ │ ├── tf.py
│ │ ├── transformations.py
│ │ ├── utils.py
│ │ └── venv.py
│ ├── nebullvm.toml
│ ├── requirements-dev.txt
│ ├── requirements.txt
│ └── setup.py
├── open_alpha_tensor/
│ ├── README.md
│ ├── config.json
│ ├── main.py
│ ├── open_alpha_tensor/
│ │ ├── __init__.py
│ │ ├── api/
│ │ │ ├── __init__.py
│ │ │ └── functions.py
│ │ ├── config.py
│ │ ├── core/
│ │ │ ├── __init__.py
│ │ │ ├── actors/
│ │ │ │ ├── __init__.py
│ │ │ │ └── stage.py
│ │ │ ├── data/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── basis_change.py
│ │ │ │ ├── dataset.py
│ │ │ │ ├── generation.py
│ │ │ │ └── utils.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── alpha_tensor.py
│ │ │ │ ├── attention.py
│ │ │ │ ├── extras.py
│ │ │ │ ├── heads.py
│ │ │ │ └── torso.py
│ │ │ └── training.py
│ │ ├── operations/
│ │ │ ├── __init__.py
│ │ │ ├── checkpoint_op.py
│ │ │ ├── model_op.py
│ │ │ └── training_op.py
│ │ └── root_op.py
│ ├── resources/
│ │ └── open_alpha_tensor.md
│ └── setup.py
├── optimate/
│ └── README.md
└── speedster/
├── README.md
├── docs/
│ └── en/
│ ├── docs/
│ │ ├── advanced_options.md
│ │ ├── benchmarks.md
│ │ ├── getting_started/
│ │ │ ├── diffusers_getting_started.md
│ │ │ ├── hf_getting_started.md
│ │ │ ├── onnx_getting_started.md
│ │ │ ├── pytorch_getting_started.md
│ │ │ └── tf_getting_started.md
│ │ ├── hardware.md
│ │ ├── installation.md
│ │ ├── key_concepts.md
│ │ ├── notebooks.md
│ │ ├── overview.md
│ │ └── telemetry.md
│ └── mkdocs.yaml
├── notebooks/
│ ├── README.md
│ ├── diffusers/
│ │ ├── Accelerate_Stable_Diffusion_with_Speedster.ipynb
│ │ └── Readme.md
│ ├── huggingface/
│ │ ├── Accelerate_Hugging_Face_PyTorch_BERT_with_Speedster.ipynb
│ │ ├── Accelerate_Hugging_Face_PyTorch_DistilBERT_with_Speedster.ipynb
│ │ ├── Accelerate_Hugging_Face_PyTorch_GPT2_with_Speedster.ipynb
│ │ ├── Accelerate_Hugging_Face_PyTorch_T5_with_Speedster.ipynb
│ │ ├── Accelerate_Hugging_Face_TensorFlow_BERT_with_Speedster.ipynb
│ │ ├── Readme.md
│ │ └── faster_transformer_bert.py
│ ├── onnx/
│ │ ├── Accelerate_ONNX_ResNet50_with_Speedster.ipynb
│ │ └── Readme.md
│ ├── pytorch/
│ │ ├── Accelerate_PyTorch_ResNet50_with_Speedster.ipynb
│ │ ├── Accelerate_PyTorch_ViT_with_Speedster.ipynb
│ │ ├── Accelerate_PyTorch_YOLOv5_with_Speedster.ipynb
│ │ ├── Accelerate_PyTorch_YOLOv8_with_Speedster.ipynb
│ │ ├── Accelerate_fast_ai_Resnet34_with_Speedster.ipynb
│ │ └── Readme.md
│ └── tensorflow/
│ ├── Accelerate_Tensorflow_ResNet50_with_Speedster.ipynb
│ └── Readme.md
├── requirements.txt
├── setup.py
├── speedster/
│ ├── __init__.py
│ ├── api/
│ │ ├── __init__.py
│ │ ├── functions.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ ├── test_huggingface.py
│ │ ├── test_onnx.py
│ │ ├── test_pytorch.py
│ │ ├── test_tensorflow.py
│ │ └── utils.py
│ ├── root_op.py
│ ├── speedster.py
│ ├── tests/
│ │ ├── __init__.py
│ │ └── test_root_op.py
│ └── utils.py
└── speedster.toml
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
optimization/nebullvm/docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
.idea
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# MacOS DS_Store
.DS_Store
# Pickle folder
.pkl_memoize_py3
# Folder where optimized models are stored
optimized_model
# Config file for tests coverage
.coveragerc
================================================
FILE: CITATION.cff
================================================
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- family-names: "Nebuly"
given-names: "S.r.l"
- family-names: "Fiori"
given-names: "Diego"
orcid: "https://orcid.org/0000-0003-1910-0565"
- family-names: "Sofi"
given-names: "Valerio"
orcid: "https://orcid.org/0000-0001-5978-897X"
title: "nebullvm"
version: 0.4.3
date-released: 2022-10-10
url: "https://github.com/nebuly-ai/nebullvm"
================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
overall community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or
advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
social@nebuly.ai.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series
of actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within
the community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.
================================================
FILE: README.md
================================================
# OptiMate
**[Legacy]**
This repository is now in a legacy phase and is no longer actively maintained. Although the source code is still available in the Git history, there will be no additional updates or official support.
**[About Nebuly]**
Our team is fully committed on creating the best user-experience platform for LLMs so that companies can understand user behavior at scale when interacting with their LLM-based products.
- To learn more on how to get started, visit our [official documentation](https://docs.nebuly.com/welcome/overview)
- If you need enterprise support, please contact us [here](https://www.nebuly.com/nebuly-book-a-demo)
**[About optimate]**
We have open-sourced a couple of internal projects to the community, but we are not currently maintaining them. Optimate is a collection of libraries designed to help you optimize your AI models. It is an open-source project developed by Nebuly AI but is **not actively maintained**.
The tools available to assist you in your optimization are:
✅ [Speedster](https://github.com/nebuly-ai/optimate/tree/main/optimization/speedster): reduce inference costs by leveraging SOTA optimization techniques that best couple your AI models with the underlying hardware (GPUs and CPUs)
✅ [Nos](https://github.com/nebuly-ai/nos): reduce infrastructure costs by leveraging real-time dynamic partitioning and elastic quotas to maximize the utilization of your Kubernetes GPU cluster
✅ [ChatLLaMA](https://github.com/nebuly-ai/optimate/tree/main/optimization/chatllama): reduce hardware and data costs by leveraging fine-tuning optimization techniques and RLHF alignment
================================================
FILE: monitoring/nebuly/__init__.py
================================================
================================================
FILE: optimization/.github/workflows/tests.yml
================================================
name: Run tests
on:
push:
branches:
- "main"
paths-ignore:
- ".github/**"
- "*.md"
- "docs/**"
- "notebooks/**"
pull_request:
branches:
- "main"
paths-ignore:
- ".github/**"
- "*.md"
- "docs/**"
- "notebooks/**"
jobs:
test_on_ubuntu_cpu:
runs-on: ubuntu-20.04
strategy:
matrix:
# Run in all these versions of Python
python-version: [ 3.8, 3.9, "3.10" ]
steps:
# Checkout the latest code from the repo
- name: Checkout repo
uses: actions/checkout@v2
# Setup which version of Python to use
- name: Set Up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
# Display the Python version being used
- name: Display Python version
run: python -c "import sys; print(sys.version)"
# Install nebullvm
- name: Install nebullvm
run: |
python -m pip install --upgrade pip
pip install .
# Install Speedster
- name: Install Speedster
run: |
cd apps/accelerate/speedster
pip install .
cd ../../..
# Install PyTorch
- name: Install PyTorch
run: python -m pip install torch==2.0.0
# Install compilers except tvm
- name: Install deep learning compilers
run: python -m nebullvm.installers.auto_installer --compilers all
# Install requirements for testing
- name: Install requirements for testing
run: pip install -r "requirements-dev.txt"
# Run api tests
- name: Run api tests
run: |
export SPEEDSTER_DISABLE_TELEMETRY=1
cd apps/accelerate/speedster
pytest
cd ../../..
# Run components tests
- name: Run components tests
run: |
cd nebullvm
pytest
cd ../
# test_on_windows_cpu:
# runs-on: windows-latest
#
# strategy:
# matrix:
# # Run in all these versions of Python
# python-version: [ 3.8, 3.9, "3.10" ]
#
# steps:
# # Checkout the latest code from the repo
# - name: Checkout repo
# uses: actions/checkout@v2
# # Setup which version of Python to use
# - name: Set Up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v2
# with:
# python-version: ${{ matrix.python-version }}
# # Display the Python version being used
# - name: Display Python version
# run: python -c "import sys; print(sys.version)"
# # Install nebullvm
# - name: Install nebullvm
# run: |
# python -m pip install --upgrade pip
# pip install .
# # Install Speedster
# - name: Install Speedster
# run: |
# cd apps/accelerate/speedster
# pip install .
# cd ../../..
# - name: Install PyTorch
# run: python -m pip install torch==2.0.0
# # Install compilers except tvm
# - name: Install deep learning compilers
# run: python -m nebullvm.installers.auto_installer --compilers all
# # Install requirements for testing
# - name: Install requirements for testing
# run: pip install -r "requirements-dev.txt"
# # Run api tests
# - name: Run api tests
# run: |
# $env:SPEEDSTER_DISABLE_TELEMETRY=1
# cd apps/accelerate/speedster
# pytest
# cd ../../..
# # Run components tests
# - name: Run components tests
# run: |
# cd nebullvm
# pytest
# cd ../
#
================================================
FILE: optimization/chatllama/LICENSE
================================================
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<http://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
================================================
FILE: optimization/chatllama/README.md
================================================
# **🦙 ChatLLaMA**
> :warning: Please note this library does NOT contain LLaMA’s weights; to access the weights, you need to apply to Meta's form.
`ChatLLaMA` 🦙 is a library that allows you to efficiently leverage LLMs fine-tuning capabilities using your own data and the least amount of compute possible.
Its purpose is to give developers peace of mind, by abstracting the efforts required for computational optimization and for the collection of large amounts of data.
If you like the project, please show your support by [leaving a star ⭐](https://github.com/nebuly-ai/nebullvm/stargazers).
## Quick install
You can install the package with pip:
```bash
pip install chatllama-py
```
Then you need to install the Llama models cloned from [Meta's repository](https://github.com/facebookresearch/llama):
```bash
git clone https://github.com/facebookresearch/llama.git
cd llama
pip install -r requirements.txt
pip install -e .
```
Follow the instructions in the Llama repository to download the model weights and tokenizer.
## What can ChatLLaMA help with?
`ChatLLaMA` 🦙 has been designed to help developers with various use cases, all related to RLHF training and optimized inference. These are some of the use cases that better resonate with our community wishlist:
- I want to train an efficient ChatGPT-like assistant on my local hardware infrastructure using a limited amount of data;
- I want to create my own personalized version of ChatGPT-like assistant without costs getting out of control;
- I want to understand which model architecture (LLaMA, OPT, GPTJ, etc.) best fits my requirements in terms of hardware, compute budget, and performance;
## Getting started
In this Getting Started we will set up a local RLHF training that will allow you to create your own ChatGPT-like assistant. In this example, we used OPT-1.3B, wherever possible we used open-source datasets and ran the training on a NVIDIA A100. If you want to use other models or hardware, we recommend reading the [supported models](#supported-models), [hardware requirements](#hardware-requirements) and [dataset preparation](#dataset-preparation) sections. In this example, we ran a few epochs of the training; this took a few hours. Any feedback on total training time, on any hardware, would be greatly appreciated. Please share your experience with our community on our Discord channel.
To quickly get you started, we will focus on 3 key steps:
1. Download YAML files to customize your training process. Please note that all the parameters of the library can be managed in the [`config.yaml`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/config/config.yaml);
2. Prepare the 3 datasets needed to train the actor model, the reward model and perform RLHF;
3. Train the models on your local infrastructure.
<details>
<summary>1 - YAML download </summary>
First, let’s get the artifacts for running ChatLLaMA. The artifacts contain:
- [`config.yaml`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/config/config.yaml): config file for model and data set. This allows you to 1) select the model you prefer (LLaMA, OPT, BLOOM, etc) 2) change all the hyperparameters of the training process;
- [`ds_config.json`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/config/ds_config.json): config file to define DeepSpeed training parameters;
- [`peft_config.yaml`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/config/peft_config.yaml): config file to define PEFT parameters; PEFT is used for efficient training with Hugging Face models. It can be used for setting the LoRA parameters as rank and precision.
- [`templates.json`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/templates.json): synthetic data generation templates that can be used to personalize the creation of the dataset. The templates are used for feeding LLMs during the data generation. Note that the [`templates.json`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/templates.json) file contains a dictionary having as *keys* the training steps (`actor`, `reward`, `rlhf`) and as *values* a string containing the personalization requests of the user. For more details see the [dataset preparation](#dataset-preparation) section;
- [`main.py`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/main.py): file to train the model.
```bash
wget -O artifacts.zip https://nbllabartifacts.blob.core.windows.net/chatllama/artifacts.zip\?sp\=r\&st\=2023-03-08T14:53:24Z\&se\=2100-03-08T22:53:24Z\&spr\=https\&sv\=2021-06-08\&sr\=b\&sig\=jqr%2B2ZkR0SW9RjV0pDOdQ%2BDulLXLjbZ36vmNd4XxxyQ%3D
unzip artifacts.zip
```
Once you have run the command above, you will find the all artificats in the [`artifacts/`](https://github.com/nebuly-ai/nebullvm/tree/main/apps/accelerate/chatllama/artifacts) directory. Now you can move on to the next section regarding the dataset preparation.
</details>
<details>
<summary> 2 - Dataset preparation </summary>
Before training the model, we need to prepare 3 datasets:
- `actor_training_data`: this is the JSON dataset used in the supervised fine-tuning. It consists of examples of unlabelled conversations, e.g. collection of prompts and responses;
- `rlhf_training_data`: this is the JSON dataset used for RLHF training. It consists of a collection of possible input user prompts;
- `reward_training_data`: this is the JSON dataset used to train the reward model. It consists of responses with associated scores.
In this example, we are using only publicly available dataset and synthetic generation; if you want to use your own data instead, please see the [Dataset preparation](#dataset-preparation) section.
First, let’s download the `actor_training_data` and the `rlhf_training_data`:
```bash
python artifacts/download_dataset.py ARLHF --path ./datasets --number_of_samples 200
```
Finally, let’s create the `reward_training_data` using `davinci-003` for synthetic data generation.
```bash
export OPENAI_API_KEY=YOUR_API_KEY
python artifacts/generate_rewards.py ./datasets/reward_training_data.json
```
> :warning: Creating the `reward_training_data` with `davinci-003` is not free, i.e. it costs a few $$. If you prefer avoiding external paid APIs, we suggest using HuggingFace’s models (e.g. flan_t5_xl) as described in more detail in the [Supported models](#supported-models) section.
>
> :warning: if using OpenAI's API, please be aware of OpenAI's terms of use stating that it is forbidden to "use the Services to develop foundation models or other large scale models that compete with OpenAI".
At this point, we have successfully created the 3 datasets. We can therefore move on to the final section and start the training.
</details>
<details>
<summary> 3 - Training </summary>
You can train the 3 models in separate steps:
- Train the Reward Model
```bash
python artifacts/main.py artifacts/config/config.yaml --type REWARD
```
- Pre-Train the Actor Model
```bash
python artifacts/main.py artifacts/config/config.yaml --type ACTOR
```
- Training the Actor with reinforcement learning.
```bash
python artifacts/main.py artifacts/config/config.yaml --type RL
```
or, equivantly, the 3 trainings can also be pipelined using the flag ALL.
```bash
python artifacts/main.py artifacts/config/config.yaml --type ALL
```
Note that the path to the datasets and the training hyper-parameters of the training process are specified in the [`config.yaml`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/config/config.yaml) file.
</details>
## Contributing and Roadmap
As an open source project in a rapidly evolving field, we welcome contributions of all kinds, including new features, improved infrastructure, and better documentation. If you're interested in contributing, please see our [Roadmap page](https://github.com/users/nebuly-ai/projects/1/views/1) for more information on how to get involved.
You can participate in the following ways:
1. Submit an issue or PR on GitHub
2. Join our [Discord group](https://discord.gg/77d5kGSa8e) to chat
## Supported models
<details><summary><b><i> Actor models </i></b></summary>
We support models that can be run efficiently with a limited amount of compute, such as LLaMA and 🤗 transformers. These are the models with less than 20B parameters currently supported :
- LLaMA: 7B and 13B, please note this library does NOT contain LLaMA’s weights; to access the weights, you need to apply to Meta's [form](https://forms.gle/jk851eBVbX1m5TAv5).
- GPTJ: 6B
- GPTNeoX: 1.3B, 20B
- **(⚠️WIP)** Flan-T5: 80M, 259M, 780M, 3B, 11B
- OPT: 125M, 359M, 1.3B, 2.7B, 6.7B, 13B
- BLOOM: 560M, 1.1B, 1.7B, 3B, 7.1B
- BLOOMZ: 560M, 1.1B, 1.7B, 3B, 7.1B
- Galactica: 125M, 1.3B, 6.7B
</details>
<details><summary><b><i> Reward models </i></b></summary>
We suggest using models under 6B from 🤗 transformers:
- GPT2: 124M, 355M, 774M, 1.5B
- OPT: 125M, 359M, 1.3B, 2.7B
- GPTJ: 6B
- BLOOMZ: 560M, 1.1B, 1.7B, 3B
- **(⚠️WIP)** OpenAssistant [pre-trained reward models](https://huggingface.co/OpenAssistant/reward-model-deberta-v3-large-v2)
</details>
<details>
<summary><b><i> Synthetic data generation models </i></b></summary>
We support both APIs from OpenAI and 🤗 transformers:
- OpenAI: da-vinci-003, gpt-3.5-turbo **(⚠️WIP)**
- HuggingFace: Flan-T5 (3B and 11B)
> :warning: if using OpenAI's API, please be aware of OpenAI's terms of use stating that it is forbidden to "use the Services to develop foundation models or other large scale models that compete with OpenAI".
:watninh
If you need support for different models, please open an issue and we will get to work.
</details>
## Hardware requirements
<details><summary><b><i> Training </i></b></summary>
Larger actor models require more powerful hardware. Here is a rough hardware recommendation table, suggesting the right type of hardware for different actor model sizes:
- 125M to 1.3B → 1x Nvidia 3090/4090
- 1.3B to 3B → 1x Nvidia A100 (80Gb)
- 3B with DeepSpeed CPU off-loading → 1x Nvidia 3090/4090
- 3B to 7B with DeepSpeed ZeRO → 4x Nvidia T4
- 3B to 13B → 4x Nvidia A100 (80Gb)
- 13B to 20B with DeepSpeed ZeRO → 4x Nvidia A100 (80Gb)
- 13B to 20B → 8x Nvidia A100 (80Gb)
</details>
<details><summary><b><i> Inference </i></b></summary>
**(⚠️WIP)** When it comes to inference optimization, ChatLLaMA will support the following optimization techniques:
- [ ] DeepSpeed ZeRO
- [ ] FlexGen
- [ ] HF Accelerate
- [ ] PyTorch Vanilla
</details>
Please note that inference optimization has yet to be implemented. If you would like to contribute, please see the **issue roadmap**, community contributions are always welcome 😊.
## Dataset preparation
To successfully train a ChatLLaMA assistant, you need 3 different datasets: `actor_training_data`, `rlhf_training_data` and `reward_training_data`.
<details>
<summary> Dataset for supervised fine-tuning of the actor model </summary>
The `actor_training_data` is a collection of prompts with the associated responses as highlighted below:
```json
[
{
"user_input": "here the input of the user",
"completion": "here the model completion"
}
]
```
ChatLLaMA supports 4 different options to prepare the `actor_training_data`:
* <details><summary> Use 100% synthetic data </summary>
The dataset can be synthetically generated by running the following command:
```bash
python artifacts/generate_actor_dataset.py
```
> :warning: Note that this command will require a subscription to OpenAI. Generating the full dataset with `davinci-003` could cost approximately ~200$.
>
> :warning: if using OpenAI's API, please be aware of OpenAI's terms of use stating that it is forbidden to "use the Services to develop foundation models or other large scale models that compete with OpenAI".
Alternatively, you can generate the dataset for free using 🤗 tranformers as described in the section [Supported models](#supported-models).
</details>
* <details><summary> Use one of the open source datasets with assistant interactions </summary>
Currently, we support:
- [Anthropic HH RLHF](https://huggingface.co/datasets/Anthropic/hh-rlhf): this dataset consists of structured question/answer pairs with an LLM chatbot that includes selected and rejected answers;
- [Stanford Human Preferences Dataset (SHP)](https://huggingface.co/datasets/stanfordnlp/SHP): this dataset is curated from selected "ask" subreddits, and includes questions that span a wide range of question/answer pairs based on the most upvoted responses. Please note that, unlike HH RLHF, this dataset is not intended to reduce harassment by selecting the ideal chatbot response, but instead weights the most helpful human responses.
The datasets can be downloaded running the following command:
```bash
python artifacts/download_dataset.py <dataset_name> --path <path_to_folder_for_download> --number_of_samples <N>
```
Where:
- `<dataset_name>` could be "SHP" for the StanfordNLP/SHP dataset or "ARLHF" for the Anthropic/hh-rlhf dataset;
- `<path_to_folder_for_download>` is the folder path to where the datasets are going to be created;
- `<N>` is the number of samples of which the reward_dataset.json is composed.
</details>
* <details><summary> Use 100% personalized dataset </summary>
The user provides his own personalized full dataset. Datasets must be JSON files with the following format:
```
[
{
"user_input": "here the input of the user",
"completion": "here the model completion"
}
]
```
Where the list contains multiple dictionaries, and each dictionary corresponds to a data sample. We suggest using more than 1000 data samples to run the actor training.
</details>
* <details><summary> (⚠️WIP) Create the full dataset augmenting few custom data samples </summary>
The dataset can be generated synthetically from a few prompt+response examples provided by the user (few =>10).
</details>
</details>
<details>
<summary> Dataset for RLHF </summary>
The dataset for RLHF consists just of prompt examples:
```json
[
{
"user_input": "here the example of user input"
}
]
```
It can be provided in 2 different ways:
* <details><summary> Few examples provided by the user and dataset synthetically expanded using LLM </summary>
You need to add the key `rlhf` to the [`templates.json`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/templates.json) file with the information about the task you want to perform and extra context needed by the LLM for the generation. Here is an example of template:
```json
{
"rlhf": "Here is the template for the generating RLHF prompts. The task we want to perform is ..."
}
```
*Note that all templates must be saved in a single JSON file named [`templates.json`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/templates.json)*
</details>
* <details><summary> The user provides the full dataset with possible interactions with the model </summary>
The dataset needs to contain more than 1000 prompt examples:
```json
[
{
"user_input": "here the example of user input"
}
]
```
The file must be named `rlhf_training_data.json`.
</details>
</details>
<details>
<summary><b> Dataset to train the reward model </b></summary>
The `reward_training_data` is a collection of i) prompts, ii) completion and iii) score of the completion assigned accordingly to the user feedback (the Human Feedback in RLHF).
```json
[{
"user_input": "...",
"completion": "...",
"score": 1
},
...
]
```
We support 3 different options to prepare the `reward_training_data`:
- Fully Synthetic Score Generation
In this case the reward dataset can be synthetically scored using a LLM as Human Feedback. We recommend the `reward_training_data` having at least 100 data samples.
```json
[{
"user_input": "...",
"completion": "...",
"score": None
},
...
]
```
A LLM model is used to assign the score to each entry.
The LLM needs a prompt template containing all the instructions to evaluate the generated text. To do this, you should add the key `reward` to the [`templates.json`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/templates.json) file. Here is an example:
```json
{
"reward": "Here is the template for the reward model. The rules are:\n\n1.Rule 1\n\n2. Rule 2"
}
```
If no template is provided the default one is used. You can find the default template in `artifacts/generate_rewards.py`. Note that all templates must be saved in a single JSON file named [`templates.json`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/templates.json).
Once you have the unlabelled dataset, you can generate the scores by running the following command:
```bash
python artifacts/generate_rewards.py <dataset_path> --model <model_to_use> --temperature <t> --max_tokens <n> --reward_template <path_to_file.json>
```
Where:
- `<dataset_path>` path to the reward dataset to be scored;
- `<model_to_use>` model to use for the reward. Default and suggested text-davinci-003 (More to come);
- `<temperature>` temperature used to score the model; temperature=0.1;
- `<max_tokens>` max_tokens of the generation;
- `<reward_template>` is the path to the [`templates.json`](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/artifacts/templates.json) file containing the template to be used for generating the reward. If no path is provided, the default template will be used.
- The user provides their personalized full dataset
Datasets must be JSON files in the following format:
```json
[
{
"user_input": "here type the user input",
"completion": "here type the completion",
"score": 4.0
},
{
"user_input": "here type the user input",
"completion": "random garbage",
"score": 0.0
}
]
```
Note that at least 100 data samples are required in this case. The file must be named `reward_training_data.json`
- **(⚠️WIP)** Few examples provided by the user and dataset synthetically expanded using LLM
</details>
# License
See the [LICENSE](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/LICENSE) file.
================================================
FILE: optimization/chatllama/artifacts/config/config.yaml
================================================
---
trainer_config:
# learning rates
actor_lr: 0.000005
critic_lr: 0.000009
# PPO Hyperparameters
actor_eps_clip: 0.2
critic_eps_clip: 0.2
beta_s: 0.02
# coefficient for the discounted rewards
gamma_discounted: 1
# path to examples to be sampled (training dataset) see rlhf_dataset.json
examples_path: "./datasets/rlhf_training_data.json"
# number of episodes and generation performed for each episode
# in the train() method
num_episodes: 100
max_timesteps: 32
# number of timesteps after which the learn() method is called
# (to update the weights)
update_timesteps: 32
# number of example sampled at each timestep
num_examples: 1
# batch and epochs for the training
batch_size: 1
epochs: 1
# number of episodes after which update the checkpoints in RL training
checkpoint_steps: 1000
# here specify the name of the actor_rl checkpoint from which resume
# during actor RL training. If null load the last one.
checkpoint_name: null
actor_config:
model: "facebook/opt-1.3b"
model_folder: "./models"
tokenizer_path: "path-to-tokenizer"
train_dataset_path: "./datasets/actor_training_data.json"
validation_dataset_path: null
# froze model embedding during training
froze_embeddings: True
# use fairscale layers to build the model instead of vanilla pytorch
# only for llama
use_fairscale: False
# max sequence length for the actor (i.e. prompt + completion) it depends on
# the model used.
max_sequence_length: 2048
# max tokens generated by the actor (completion only)
max_tokens: 2048
# minimum number of tokens generated by the actor
min_tokens: 100
# additional prompt tokens to be used for template or as safety
additonal_prompt_tokens: 20
# temperature for the actor
temperature: 0.1
batch_size: 2
# number iteration after print
iteration_per_print: 1
lr: 0.000009
epochs: 1
# number of backpropagation after saving the checkpoints
checkpoint_steps: 5000
# number of checkpoints to keep while removing the older
# (keep memory consumption of checkpoints reasonable)
n_checkpoints_to_keep: 5
# here specify the name of the actor checkpoint from which resume
# during actor training. If null load the last one.
checkpoint_name: null
# deepspeed settings
deepspeed_enable: False
deepspeed_config_path: "./artifacts/config/ds_config.json"
# accelerate settings
accelerate_enable: False
# use_peft - the parameters of PEFT can be modified in the peft_config.yaml
peft_enable: False
peft_config_path: "./artifacts/config/peft_config.yaml"
reward_config:
# model to be chosen are gp2-large, bart-base, longformer-base-4096
# more can be simply added in the reward.py __init__()
model: "facebook/opt-125m"
model_folder: "./models"
# hidden size of the additional ffw head to produce the scores
model_head_hidden_size: 2048
max_sequence_length: 2048
train_dataset_path: "./datasets/reward_training_data.json"
validation_dataset_path: null
batch_size: 8
epochs: 1
iteration_per_print: 1
# steps after which the checkpoint are saved
checkpoint_steps: 10000
# here specify the name of the reward checkpoint from which resume
# during reward training. If null load the last one.
checkpoint_name: null
lr: 0.000009
# deepspeed settings
deepspeed_enable: False
deepspeed_config_path: "./artifacts/config/ds_config.json"
# accelerate settings
accelerate_enable: False
critic_config:
# model to be chosen are gp2-large, bart-base, longformer-base-4096
# more can be simply added in the reward.py __init__()
model: "facebook/opt-125m"
# hidden size of the additional ffw head to produce the scores
model_head_hidden_size: 2048
max_sequence_length: 2048
model_folder: "./models"
# here specify the name of the critic checkpoint from which resume
# during critic training. If null load the last one.
checkpoint_name: null
================================================
FILE: optimization/chatllama/artifacts/config/ds_config.json
================================================
{
"train_batch_size": 8,
"gradient_accumulation_steps": 1,
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.00015
}
},
"fp16": {
"enabled": false,
"auto_cast": false,
"loss_scale": 0,
"initial_scale_power": 16,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
},
"zero_optimization": {
"stage": 2,
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"overlap_comm": false,
"reduce_scatter": true,
"reduce_bucket_size": 5e8,
"contiguous_gradients" : true,
"offload_param": {
"device": "cpu",
"nvme_path": "/local_nvme",
"pin_memory": true,
"buffer_count": 5,
"buffer_size": 1e8,
"max_in_cpu": 1e9
},
"offload_optimizer": {
"device": "cpu",
"nvme_path": "/local_nvme",
"pin_memory": true,
"buffer_count": 4,
"fast_init": false
},
"stage3_max_live_parameters" : 1e9,
"stage3_max_reuse_distance" : 1e9,
"stage3_prefetch_bucket_size" : 5e8,
"stage3_param_persistence_threshold" : 1e6,
"sub_group_size" : 1e12,
"elastic_checkpoint" : true,
"stage3_gather_16bit_weights_on_model_save": true,
"ignore_unused_parameters": true,
"round_robin_gradients": true
}
}
================================================
FILE: optimization/chatllama/artifacts/config/peft_config.yaml
================================================
---
inference_mode: False
r: 8
lora_alpha: 32
lora_dropout: 0.1
================================================
FILE: optimization/chatllama/artifacts/datasets/actor_dataset.json
================================================
[
{
"user_input": "here the input of the user",
"completion": "here the model completion"
}
]
================================================
FILE: optimization/chatllama/artifacts/datasets/reward_dataset.json
================================================
[
{
"user_input": "here type the user input",
"completion": "here type the completion",
"score": 4.0
},
{
"user_input": "here type the user input",
"completion": "if score is null, it can be evaluated by davinci using reward_trainer.distill()",
"score": null
}
]
================================================
FILE: optimization/chatllama/artifacts/datasets/rlhf_dataset.json
================================================
[
{
"user_input": "here the example of user input"
}
]
================================================
FILE: optimization/chatllama/artifacts/download_dataset.py
================================================
import argparse
import os
from chatllama.rlhf.dataset import AnthropicRLHF, StanfordNLPSHPDataset
if __name__ == "__main__":
# Setup argument parser
parser = argparse.ArgumentParser(
prog="generate_rewards.py",
description="Generate rewards using LangChain and LLMs",
)
parser.add_argument(
"dataset_name",
help="dataset name it can be. SSHP: stanfordnlp/SHP or ",
choices=["SHP", "ARLHF"],
)
parser.add_argument(
"-p",
"--path",
help="Specify the path for the dataset",
default="./datasets",
)
parser.add_argument(
"-n",
"--number_of_samples",
help="Specify the number of samples for the reward dataset",
default=200,
)
args = parser.parse_args()
if os.path.exists(args.path) is False:
os.mkdir(args.path)
try:
n_samples = int(args.number_of_samples)
except ValueError:
raise ValueError("Number of samples should be an integer")
if args.dataset_name == "SHP":
dataset = StanfordNLPSHPDataset()
dataset.save_dataset(args.path, n_samples)
elif args.dataset_name == "ARLHF":
dataset = AnthropicRLHF()
dataset.save_dataset(
args.path,
n_samples,
)
================================================
FILE: optimization/chatllama/artifacts/extend_rlhf_dataset.py
================================================
import os.path
import numpy as np
from langchain import OpenAI, LLMChain, PromptTemplate
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
def _get_template_and_variables(prompt: str, with_examples: bool):
if with_examples:
template = prompt + "\n\nExample: {example}"
variables = ["example"]
else:
template = prompt
variables = []
return template, variables
def use_langchain_model(
user_prompt: str,
model_name: str,
temperature: float = 0.7,
max_tokens: int = 2048,
with_examples: bool = False,
) -> LLMChain:
llm = OpenAI(
model_name=model_name, temperature=temperature, max_tokens=max_tokens
)
template, input_variables = _get_template_and_variables(
user_prompt, with_examples=with_examples
)
prompt_template = PromptTemplate(
template=template,
input_variables=input_variables,
)
return LLMChain(llm=llm, prompt=prompt_template)
class HuggingFaceChain:
def __init__(
self, model_name: str, user_prompt: str, with_examples: bool = False
):
self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.prompt, self.input_variables = _get_template_and_variables(
user_prompt, with_examples=with_examples
)
def run(self, **kwargs):
prompt = self.prompt.format(**kwargs)
input_ids = self.tokenizer.encode(prompt, return_tensors="pt")
output = self.model.generate(
input_ids, max_length=100, num_beams=5, early_stopping=True
)
return self.tokenizer.decode(output[0], skip_special_tokens=True)
def use_huggingface_model(
user_prompt: str,
model_name: str,
with_examples: bool = False,
) -> HuggingFaceChain:
return HuggingFaceChain(
model_name, user_prompt, with_examples=with_examples
)
def main():
import json
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument(
"--model",
type=str,
help="Model name.",
default="google/flan-t5-xl",
)
parser.add_argument("--templates", type=str, help="Path to templates.")
parser.add_argument("--num_prompts", type=int, default=1000)
parser.add_argument(
"--data_dir", type=str, help="Path where data are stored"
)
args = parser.parse_args()
model_name = args.model
templates_path = args.templates
data_dir = args.data_dir
with open(os.path.join(data_dir, "rlhf_training_data.json"), "r") as f:
examples = json.load(f)
with open(templates_path, "r") as f:
templates = json.load(f)
user_prompt = templates.get("rlhf")
if user_prompt is None:
raise ValueError("No rlhs template found.")
if "davinci" in model_name:
chain = use_langchain_model(
user_prompt, model_name, with_examples=True
)
else:
if "t5" not in model_name:
raise ValueError("Only Flan-t5 models are supported for HF.")
chain = use_huggingface_model(
user_prompt, model_name, with_examples=True
)
for i in range(args.num_prompts):
example = np.random.choice(examples)
new_example = chain.run(example=example["user_input"])
example_dict = {"user_input": new_example}
examples.append(example_dict)
with open(os.path.join(data_dir, "rlhf_training_data.json"), "w") as f:
json.dump(examples, f)
if __name__ == "__main__":
main()
================================================
FILE: optimization/chatllama/artifacts/generate_actor_dataset.py
================================================
from langchain import OpenAI, LLMChain, PromptTemplate
from langchain.chains.conversation.memory import (
ConversationBufferWindowMemory,
)
from chatllama.langchain_modules.prompt_templates import (
PERSON_CHATBOT_TEMPLATE,
AI_CHATBOT_TEMPLATE,
)
CONVERSATION_LENGTH = 20
def create_conversation(human_agent: LLMChain, bot_agent: LLMChain):
conversation = []
chatbot_output = ""
for i in range(CONVERSATION_LENGTH):
# Human agent goes first
human_output = human_agent.run(chatbot_input=chatbot_output)
conversation.append(f"Human: {human_output}")
chatbot_output = bot_agent.run(human_input=human_output)
conversation.append(f"AI: {chatbot_output}")
return "\n".join(conversation)
def build_agents():
# be aware that too long completions will not fit the sequence length
# of possible critic or reward models ...
llm = OpenAI(max_tokens=2048, temperature=0.7)
human_template = PromptTemplate(**PERSON_CHATBOT_TEMPLATE)
human_agent = LLMChain(
llm=llm,
prompt=human_template,
memory=ConversationBufferWindowMemory(k=4),
)
bot_template = PromptTemplate(**AI_CHATBOT_TEMPLATE)
bot_agent = LLMChain(
llm=llm,
prompt=bot_template,
memory=ConversationBufferWindowMemory(k=4),
)
return human_agent, bot_agent
def get_sub_conversations(conversation: str, system_prompt: str):
interactions = conversation.split("AI:")
sub_conversations = []
for i in range(len(interactions) - 1):
user_input = system_prompt + "AI:".join(interactions[: i + 1])
completion = interactions[i + 1].split("Human:")[0].strip()
sub_conversations.append(
{"user_input": user_input, "completion": completion}
)
return sub_conversations
def main():
import json
import os
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("--num_conversations", type=int, default=1000)
parser.add_argument("--output_dir", type=str, default="conversations")
parser.add_argument("--templates", type=str, default=None)
args = parser.parse_args()
if args.templates is not None:
with open(args.templates, "r") as f:
templates = json.load(f)
template = templates["actor"]
else:
template = ""
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
for conv in range(args.num_conversations):
human_agent, bot_agent = build_agents()
conversation = create_conversation(human_agent, bot_agent)
with open(
os.path.join(args.output_dir, f"conversation_{conv}.txt"), "w"
) as f:
f.write(conversation)
# convert the conversations to a single json file
data = []
for conv in range(args.num_conversations):
with open(
os.path.join(args.output_dir, f"conversation_{conv}.txt"), "r"
) as f:
conversation = f.read()
sub_conversations = get_sub_conversations(conversation, template)
data.extend(sub_conversations)
with open(
os.path.join(args.output_dir, "actor_training_data.json"), "w"
) as f:
json.dump(data, f)
if __name__ == "__main__":
main()
================================================
FILE: optimization/chatllama/artifacts/generate_rewards.py
================================================
import argparse
import json
from langchain import OpenAI, LLMChain, PromptTemplate
class ScoreGenerator:
def __init__(
self,
llm_model: str,
llm_temperature: float,
llm_max_tokens: int,
reward_template: dict,
) -> None:
self.llm_max_tokens = llm_max_tokens
self.llm_temperature = llm_temperature
self.llm_model = llm_model
# initialize LLM and LangChain
openai_llm = OpenAI(
model_name=llm_model,
temperature=llm_temperature,
max_tokens=llm_max_tokens,
)
# Customaize your own Reward template by changing the
# prompt_template
prompt_template = PromptTemplate(**reward_template)
print(prompt_template)
self.llm = LLMChain(llm=openai_llm, prompt=prompt_template)
def distill(
self,
dataset_path: str,
) -> None:
"""Parse the dataset and assign scores using LLMs
then save back the dataset with the uploaded scores
"""
print("Assigning scores to the reward dataset...")
# load the dataset
with open(dataset_path, "r") as f:
train_data = json.load(f)
# for each element of the dataset, assing a score.
for i, data in enumerate(train_data):
if data.get("score", None) is None:
user_input = data["user_input"]
completion = data["completion"]
print(
f"#### Data {i}:\n"
f"#### User_input:\n {user_input}\n"
f"#### Completion:\n {completion}\n"
)
prompt_tokens = (
data["user_input"]
+ data["completion"]
+ self.llm.prompt.template
)
prompt_len = int(len(prompt_tokens.split(" ")) / 0.75)
# 80% of the max length as safety margin
if prompt_len > self.llm_max_tokens * 0.8:
print(
f"The prompt of the data {i} is too long\n"
f"tokens: {prompt_len}\n"
f"max_tokens: {self.llm_max_tokens * 0.8}"
)
continue
score = self.llm.run(
user_input=data["user_input"],
completion=data["completion"],
).strip()
# TODO: extract from score the float value with a regex
try:
score = float(score)
except Exception:
print(
f"The score returned by the LLM for the"
f"data, {i}, is not a float float:\n{score}"
)
continue
data["score"] = score
print(f"### Score: {score} \n\n")
# remove all the data that have no score
train_data = [data for data in train_data if data.get("score", None)]
# save the dataset back
print("Writing the updated dataset back to disk ... ")
with open(dataset_path, "w") as f:
json.dump(train_data, f)
print("Score Assignment Completed")
if __name__ == "__main__":
REWARD_TEMPLATE = dict(
template=(
"You have to evaluate the following chat with a score"
"between 0 and 5"
"You MUST evaluate: text quality, content quality and"
"coherence.\n"
"You MUST return only the number that represents your"
"judgment.\n"
"The input of the user is: {user_input}\n"
"The output of the chatbot is: {completion}\n"
"The score is:\n"
),
input_variables=["user_input", "completion"],
)
# Setup argument parser
parser = argparse.ArgumentParser(
prog="generate_rewards.py",
description="Generate rewards using LangChain and LLMs",
)
parser.add_argument("dataset_path", help="Path to the dataset")
parser.add_argument(
"-m",
"--model",
help="Specify the model to be used",
default="text-davinci-003",
)
parser.add_argument(
"-t",
"--temperature",
help="Specify the temperature of the score assignment",
default=0.5,
)
parser.add_argument(
"-k",
"--max_tokens",
help="Specify the max tokens of the score assignement",
default=2048,
)
parser.add_argument(
"-r",
"--reward_template",
help="Specify the reward template to be used",
default=None,
)
# parse arguments
args = parser.parse_args()
if args.reward_template:
templates = json.loads(args.reward_template)
if templates.get("reward", None) is None:
rw_template = REWARD_TEMPLATE
else:
rw_template = templates["reward"]
else:
rw_template = REWARD_TEMPLATE
score_generator = ScoreGenerator(
args.model, args.temperature, args.max_tokens, rw_template
)
score_generator.distill(args.dataset_path)
================================================
FILE: optimization/chatllama/artifacts/main.py
================================================
import argparse
from chatllama.rlhf.actor import ActorTrainer
from chatllama.rlhf.config import Config
from chatllama.rlhf.dataset import BaseDataset
from chatllama.rlhf.reward import RewardTrainer
from chatllama.rlhf.trainer import RLTrainer
# Setup argument parser
parser = argparse.ArgumentParser(
prog="main.py", description="RLHF Training of ChatBots"
)
parser.add_argument("configfile", help="Path to config.yaml file")
parser.add_argument(
"-t",
"--type",
help=(
"Specify the training type. RL: Training of the model using RL."
"ACTOR: Training of the actor model. "
"REWARD: Training of the reward model."
"RL: The whole pipeline with the three training steps"
),
default="ALL",
choices=["ALL", "RL", "ACTOR", "REWARD"],
)
parser.add_argument(
"-a", "--actor", help="Specify actor model by name", default=None
)
parser.add_argument(
"-r", "--reward", help="Specify reward model by name", default=None
)
parser.add_argument("--local_rank", help="Local rank parameter for deepspeed", default=None)
# parse arguments
args = parser.parse_args()
# load config.yaml with all the project informations
config = Config(args.configfile)
# overwrite config if specified differently
if args.actor is not None:
config.actor.model = args.actor
if args.reward is not None:
config.reward.model = args.reward
# perform the desired training
if args.type == "RL":
max_seq = min(
config.actor.max_sequence_length,
config.reward.max_sequence_length,
config.critic.max_sequence_length,
)
config.actor.max_sequence_length = max_seq
BaseDataset.clean_dataset(config)
rlhf_trainer = RLTrainer(config)
rlhf_trainer.train()
elif args.type == "ACTOR":
BaseDataset.clean_dataset(config.actor)
actor_trainer = ActorTrainer(config.actor)
actor_trainer.train()
elif args.type == "REWARD":
BaseDataset.clean_dataset(config.reward)
reward_trainer = RewardTrainer(config.reward)
reward_trainer.train()
elif args.type == "ALL":
reward_trainer = RewardTrainer(config.reward)
reward_trainer.train()
actor_trainer = ActorTrainer(config.actor)
actor_trainer.train()
rlhf_trainer = RLTrainer(config)
rlhf_trainer.train()
================================================
FILE: optimization/chatllama/artifacts/templates.json
================================================
{
"rlhf": "You are an AI assistant used to generate possible prompts instructions for a chatbot, here is an example of conversation."
}
================================================
FILE: optimization/chatllama/chatllama/__init__.py
================================================
================================================
FILE: optimization/chatllama/chatllama/langchain_modules/__init__.py
================================================
================================================
FILE: optimization/chatllama/chatllama/langchain_modules/prompt_templates.py
================================================
REWARD_TEMPLATE = dict(
template=(
"You have to evaluate the following chat with a score between 0 and 5"
"You MUST evaluate: text quality, content quality and"
"coherence.\n"
"You MUST return only the number that represents your"
"judgment.\n"
"The assignement is:\n{user_input}\n"
"The completion is:\n{completion}\n"
),
input_variables=["user_input", "completion"],
)
AI_CHATBOT_TEMPLATE = dict(
template=(
"Assistant is a large language model trained by Meta and Nebuly.ai\n"
"Assistant is designed to be able to assist with a wide range of "
"tasks, from answering simple questions to providing in-depth "
"explanations and discussions on a wide range of topics. As a "
"language model, Assistant is able to generate human-like text "
"based on the input it receives, allowing it to engage in "
"natural-sounding conversations and provide responses that are "
"coherent and relevant to the topic at hand.\n\n"
"Assistant is constantly learning and improving, and its capabilities "
"are constantly evolving. It is able to process and understand large "
"amounts of text, and can use this knowledge to provide accurate and "
"informative responses to a wide range of questions. Additionally, "
"Assistant is able to generate its own text based on the input it "
"receives, allowing it to engage in discussions and provide "
"explanations and descriptions on a wide range of topics.\n\n"
"Overall, Assistant is a powerful tool that can help with a wide "
"range of tasks and provide valuable insights and information on a "
"wide range of topics. Whether you need help with a specific "
"question or just want to have a conversation about a particular "
"topic, Assistant is here to assist.\n\n{history}\n\n"
"Human: {human_input}\n"
"Assistant:"
),
input_variables=["history", "human_input"],
)
PERSON_CHATBOT_TEMPLATE = dict(
template=(
"You are a human chatting with a chatbot. The chatbot is a large "
"language model trained by Meta and Nebuly-ai\n"
"The chatbot is designed to be able to assist you with a wide range "
"of tasks, from answering simple questions to providing in-depth "
"explanations and discussions on a wide range of topics. You are a "
"human and you are testing the chatbot. Ask the chatbot questions and"
"see how it responds. You can also ask the chatbot to tell you a "
"story."
"\n\n{history}\n\n"
"Chatbot: {chatbot_input}\n"
"Human:"
),
input_variables=["history", "chatbot_input"],
)
================================================
FILE: optimization/chatllama/chatllama/llama_model.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed according to the terms
# of the GNU General Public License version 3.
import json
import math
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Tuple, List, Union, Optional
import deepspeed
import torch
import torch.distributed
import torch.nn.functional as F
import fairscale.nn.model_parallel.initialize as fs_init
from fairscale.nn.model_parallel.initialize import initialize_model_parallel
from fairscale.nn.model_parallel.layers import (
ParallelEmbedding,
RowParallelLinear,
ColumnParallelLinear,
)
from torch import nn
from transformers import AutoTokenizer
from llama import Tokenizer
from llama.generation import sample_top_p
class MyTokenizer:
"""Masked tokenizer of hugging face to be similar to the one of meta,
just used for testing purposes.
"""
def __init__(self, model_path: Optional[str] = None):
if model_path is None:
self.sp_model = AutoTokenizer.from_pretrained("gpt2")
else:
self.sp_model = AutoTokenizer.from_pretrained(model_path)
self.n_words = self.sp_model.vocab_size
self.bos_id = self.sp_model.bos_token_id
self.eos_id = self.sp_model.eos_token_id
self.pad_id = self.sp_model.eos_token_id
def encode(
self,
s: str,
bos: bool = True,
eos: bool = True,
truncation: bool = True,
) -> List[int]:
output = self.sp_model.encode(s, truncation=truncation)
t = list(output)
if bos:
t = [self.bos_id] + t
if eos:
t = t + [self.eos_id]
return t
def decode(self, t: List[int]) -> str:
input = torch.as_tensor(t)
output = self.sp_model.decode(input)
return output
class HFLikeTokenizer:
def __init__(self, tokenizer: Tokenizer):
self.tokenizer = tokenizer
# assign attributes from real tokenizer to masked one
self.pad_id = self.tokenizer.pad_id
self.eos_id = self.tokenizer.eos_id
self.bos_id = self.tokenizer.bos_id
# mask attribute to be similar to hugging face
self.eos_token_id = self.tokenizer.eos_id
self.pad_token_id = self.tokenizer.pad_id
# to match hugging face attribute
self.pad_token_id = self.pad_id
def create_sequence_mask(self, tokens: torch.Tensor) -> torch.Tensor:
mask = torch.where(
tokens == self.tokenizer.pad_id,
torch.zeros_like(tokens),
torch.ones_like(tokens),
)
mask = torch.where(
tokens == self.tokenizer.bos_id, torch.zeros_like(tokens), mask
)
mask = torch.where(
tokens == self.tokenizer.eos_id, torch.zeros_like(tokens), mask
)
return mask
def __call__(self, texts: Union[List[str], str], *args, **kwargs):
if isinstance(texts, str):
text = self.tokenizer.encode(texts, bos=True, eos=True)
tokens = torch.tensor(text).long()
mask = torch.ones_like(tokens)
else:
texts = [
self.tokenizer.encode(text, bos=True, eos=True)
for text in texts
]
max_len = max(len(text) for text in texts)
tokens = torch.full(
(len(texts), max_len), self.tokenizer.pad_id
).long()
for i, text in enumerate(texts):
tokens[i, -len(text) :] = torch.tensor( # noqa E203
text
).long()
# TODO: decide how eos and bos should be handled - i need to mask
# them? or not?
mask = self.create_sequence_mask(tokens)
for i in range(tokens.shape[0]):
current_tokens = tokens[i, mask[i] == 1]
tokens[
i, -len(current_tokens) - 1 : -1 # noqa E203
] = current_tokens
mask = self.create_sequence_mask(tokens)
# convert `pad_id` from -1 to 0, otherwise embedding will cause out
# of bounds.
tokens = torch.where(
tokens == self.tokenizer.pad_id,
torch.zeros_like(tokens),
tokens,
)
output = {
"input_ids": tokens,
"attention_mask": mask,
}
return output
def decode(self, tokens):
return self.tokenizer.decode(tokens)
@dataclass
class ModelArgs:
"""This class is a modification of the ModelArgs class implemented in
the LLaMA repo. The class has been modified for training, since the
original one just supports inference.
"""
dim: int = 512
n_layers: int = 8
n_heads: int = 8
# defined later by tokenizer
vocab_size: int = -1
# make SwiGLU hidden layer size multiple of large power of 2
multiple_of: int = 256
norm_eps: float = 1e-5
max_batch_size: int = 32
max_seq_len: int = 1024
# added attributes
froze_embeddings: bool = True
use_fairscale: bool = True
class RMSNorm(torch.nn.Module):
"""This class is a modification of the RMSNorm class implemented in
the LLaMA repo. The class has been modified for training, since the
original one just supports inference.
"""
def __init__(self, dim: int, eps: float = 1e-6):
super().__init__()
self.eps = eps
self.weight = nn.Parameter(torch.ones(dim))
def _norm(self, x):
return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
def forward(self, x):
output = self._norm(x.float()).type_as(x)
return output * self.weight
def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0):
freqs = 1.0 / (
theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim)
)
t = torch.arange(end, device=freqs.device) # type: ignore
freqs = torch.outer(t, freqs).float() # type: ignore
freqs_cis = torch.polar(torch.ones_like(freqs), freqs) # complex64
return freqs_cis
def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
ndim = x.ndim
assert 0 <= 1 < ndim
assert freqs_cis.shape == (x.shape[1], x.shape[-1])
shape = [
d if i == 1 or i == ndim - 1 else 1 for i, d in enumerate(x.shape)
]
return freqs_cis.view(*shape)
def apply_rotary_emb(
xq: torch.Tensor,
xk: torch.Tensor,
freqs_cis: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
return xq_out.type_as(xq), xk_out.type_as(xk)
class Attention(nn.Module):
"""This class is a modification of the Attention class implemented in
the LLaMA repo. The class has been modified for training, since the
original one just supports inference.
"""
def __init__(self, args: ModelArgs):
super().__init__()
if args.use_fairscale:
self.n_local_heads = (
args.n_heads // fs_init.get_model_parallel_world_size()
)
else:
self.n_local_heads = args.n_heads
self.head_dim = args.dim // args.n_heads
if args.use_fairscale:
self.wq = ColumnParallelLinear(
args.dim,
args.n_heads * self.head_dim,
bias=False,
gather_output=False,
init_method=lambda x: x,
)
self.wk = ColumnParallelLinear(
args.dim,
args.n_heads * self.head_dim,
bias=False,
gather_output=False,
init_method=lambda x: x,
)
self.wv = ColumnParallelLinear(
args.dim,
args.n_heads * self.head_dim,
bias=False,
gather_output=False,
init_method=lambda x: x,
)
self.wo = RowParallelLinear(
args.n_heads * self.head_dim,
args.dim,
bias=False,
input_is_parallel=True,
init_method=lambda x: x,
)
else:
self.wq = nn.Linear(
args.dim, args.n_heads * self.head_dim, bias=False
)
self.wk = nn.Linear(
args.dim, args.n_heads * self.head_dim, bias=False
)
self.wv = nn.Linear(
args.dim, args.n_heads * self.head_dim, bias=False
)
self.wo = nn.Linear(
args.n_heads * self.head_dim, args.dim, bias=False
)
self.dim_cache = (
args.max_batch_size,
args.max_seq_len,
self.n_local_heads,
self.head_dim,
)
self.cache_k = torch.zeros(self.dim_cache).cuda()
self.cache_v = torch.zeros(self.dim_cache).cuda()
def forward(
self,
x: torch.Tensor,
kv_mask: torch.Tensor,
freqs_cis: torch.Tensor,
cache_k: Optional[torch.Tensor] = None,
cache_v: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
start_pos = 0 # Temporary
bsz, seqlen, _ = x.shape
xq, xk, xv = self.wq(x), self.wk(x), self.wv(x)
xq = xq.view(bsz, seqlen, self.n_local_heads, self.head_dim)
xk = xk.view(bsz, seqlen, self.n_local_heads, self.head_dim)
xv = xv.view(bsz, seqlen, self.n_local_heads, self.head_dim)
xq, xk = apply_rotary_emb(xq, xk, freqs_cis=freqs_cis)
# Modified code to allow training, caching is not good for training
if (cache_k is None and cache_v is not None) or (
cache_k is not None and cache_v is None
):
raise ValueError("cache_k is None while cache_v is not None")
if cache_k is None:
keys = xk
values = xv
else:
cache_k.to(xk.device)
cache_v.to(xv.device)
cache_k[:bsz, start_pos : start_pos + seqlen] = xk # noqa E203
cache_v[:bsz, start_pos : start_pos + seqlen] = xv # noqa E203
keys = self.cache_k[:bsz, : start_pos + seqlen] # noqa E203
values = self.cache_v[:bsz, : start_pos + seqlen] # noqa E203
xq = xq.transpose(1, 2)
keys = keys.transpose(1, 2)
values = values.transpose(1, 2)
scores = torch.matmul(xq, keys.transpose(2, 3)) / math.sqrt(
self.head_dim
)
if kv_mask is not None:
scores = scores + kv_mask
scores = F.softmax(scores.float(), dim=-1).type_as(xq)
output = torch.matmul(scores, values)
output = output.transpose(1, 2).contiguous().view(bsz, seqlen, -1)
if cache_k is None:
return self.wo(output), None, None
else:
return self.wo(output), self.cache_k, self.cache_v
class FeedForward(nn.Module):
"""This class is a modification of the FeedForward class implemented in
the LLaMA repo. The class has been modified for training, since the
original one just supports inference.
"""
def __init__(
self, dim: int, hidden_dim: int, multiple_of: int, use_fairscale: bool
):
super().__init__()
hidden_dim = int(2 * hidden_dim / 3)
hidden_dim = multiple_of * (
(hidden_dim + multiple_of - 1) // multiple_of
)
if use_fairscale:
self.w1 = ColumnParallelLinear(
dim,
hidden_dim,
bias=False,
gather_output=False,
init_method=lambda x: x,
)
self.w2 = RowParallelLinear(
hidden_dim,
dim,
bias=False,
input_is_parallel=True,
init_method=lambda x: x,
)
self.w3 = ColumnParallelLinear(
dim,
hidden_dim,
bias=False,
gather_output=False,
init_method=lambda x: x,
)
else:
self.w1 = nn.Linear(dim, hidden_dim, bias=False)
self.w2 = nn.Linear(hidden_dim, dim, bias=False)
self.w3 = nn.Linear(dim, hidden_dim, bias=False)
def forward(self, x):
return self.w2(F.silu(self.w1(x)) * self.w3(x))
class TransformerBlock(nn.Module):
"""This class is a modification of the TransformerBlock class
implemented in the LLaMA repo. The class has been modified for training,
since the original one just supports inference.
"""
def __init__(self, layer_id: int, args: ModelArgs):
super().__init__()
self.n_heads = args.n_heads
self.dim = args.dim
self.head_dim = args.dim // args.n_heads
self.attention = Attention(args)
self.feed_forward = FeedForward(
dim=args.dim,
hidden_dim=4 * args.dim,
multiple_of=args.multiple_of,
use_fairscale=args.use_fairscale,
)
self.layer_id = layer_id
self.attention_norm = RMSNorm(args.dim, eps=args.norm_eps)
self.ffn_norm = RMSNorm(args.dim, eps=args.norm_eps)
self.use_fairscale = args.use_fairscale
def forward(
self,
x: torch.Tensor,
attention_mask: torch.Tensor,
freqs_cis: torch.Tensor,
cache_k: Optional[torch.Tensor] = None,
cache_v: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]:
# modified from orignal code to enable external cache
attention_mask = attention_mask[:, None, :, :]
if self.use_fairscale:
attention_mask = attention_mask.expand(
-1,
self.n_heads // fs_init.get_model_parallel_world_size(),
-1,
-1,
)
else:
attention_mask = attention_mask.expand(-1, self.n_heads, -1, -1)
attn, cache_k, cache_v = self.attention.forward(
self.attention_norm(x), attention_mask, freqs_cis, cache_k, cache_v
)
h = x + attn
out = h + self.feed_forward.forward(self.ffn_norm(h))
return out, cache_k, cache_v
class Transformer(nn.Module):
"""This class is a modification of the Transformer class implemented in
the LLaMA repo. The class has been modified for training, since the
original one just supports inference. The generate method was inspired by
the generate function you can find in `llama.generation`.
"""
def __init__(self, params: ModelArgs):
super().__init__()
self.params = params
self.vocab_size = params.vocab_size
self.n_layers = params.n_layers
if params.use_fairscale:
self.n_local_heads = (
params.n_heads // fs_init.get_model_parallel_world_size()
)
else:
self.n_local_heads = params.n_heads
self.head_dim = params.dim // params.n_heads
dim = (
params.max_batch_size,
params.max_seq_len,
self.n_local_heads,
self.head_dim,
)
self.cache_k = [torch.zeros(dim) for _ in range(self.n_layers)]
self.cache_v = [torch.zeros(dim) for _ in range(self.n_layers)]
if params.use_fairscale:
self.tok_embeddings = ParallelEmbedding(
params.vocab_size, params.dim, init_method=lambda x: x
)
else:
self.tok_embeddings = nn.Embedding(params.vocab_size, params.dim)
if params.froze_embeddings:
for param in self.tok_embeddings.parameters():
param.requires_grad = False
self.layers = torch.nn.ModuleList()
for layer_id in range(params.n_layers):
self.layers.append(TransformerBlock(layer_id, params))
self.norm = RMSNorm(params.dim, eps=params.norm_eps)
if params.use_fairscale:
self.output = ColumnParallelLinear(
params.dim,
params.vocab_size,
bias=False,
init_method=lambda x: x,
)
else:
self.output = nn.Linear(params.dim, params.vocab_size, bias=False)
# TODO: How too modify this for training?
self.freqs_cis = precompute_freqs_cis(
self.params.dim // self.params.n_heads, self.params.max_seq_len * 2
)
def forward(
self, tokens: torch.Tensor, attention_mask: torch.Tensor
) -> torch.Tensor:
attention_mask = attention_mask.detach()
logits = self._forward(tokens, attention_mask)
return logits
def _forward(
self, tokens: torch.Tensor, attention_mask: torch.Tensor
) -> torch.Tensor:
_bsz, seqlen = tokens.shape
h = self.tok_embeddings(tokens)
self.freqs_cis = self.freqs_cis.to(h.device)
# TEMPORARY FIX, need to understand how to manage the positioning
# embedding and the batch size with the current padding and masking.
start_pos = 1
freqs_cis = self.freqs_cis[start_pos : start_pos + seqlen] # noqa E203
# mask has size (bsz, seqlen). It should be transformed in
# (bsz, seqlen, seqlen)
# if the mask is a boolean tensor, convert it to int
if attention_mask.dtype == torch.bool:
attention_mask = attention_mask.long()
kv_mask = attention_mask[:, None, :].expand(_bsz, seqlen, seqlen)
kv_mask = torch.tril(kv_mask, diagonal=0)
kv_mask = 1 - kv_mask
kv_mask = (
torch.where(
kv_mask == 1, kv_mask.new_tensor(-9223372036854775808), kv_mask
)
.detach()
.long()
)
for i, layer in enumerate(self.layers):
if not self.training:
cache_k = self.cache_k[i]
cache_v = self.cache_v[i]
h, cache_k, cache_v = layer(
h, kv_mask, freqs_cis, cache_k, cache_v
)
else:
h, _, _ = layer(h, kv_mask, freqs_cis)
if not self.training:
self.cache_k[i] = cache_k.detach()
self.cache_v[i] = cache_v.detach()
h = self.norm(h)
output = self.output(h)
return output
@torch.no_grad()
def generate(
self,
input_ids: torch.Tensor,
attention_mask: torch.Tensor,
max_new_tokens: int,
temperature: float,
top_p: float = 1.0,
no_repeat_ngram_size=None,
):
generated_tokens = []
for cur_pos in range(max_new_tokens):
logits = self._forward(input_ids, attention_mask)[:, -1, :]
if temperature > 0:
probs = torch.softmax(logits / temperature, dim=-1)
next_token = sample_top_p(probs, top_p)
else:
next_token = torch.argmax(logits, dim=-1)
next_token = next_token.reshape(-1)
input_ids = torch.cat([input_ids, next_token.unsqueeze(1)], dim=1)
attention_mask = torch.cat(
[attention_mask, torch.ones_like(next_token).unsqueeze(1)],
dim=1,
)
generated_tokens.append(next_token)
sequences = torch.concat(
(input_ids, torch.stack(generated_tokens, dim=1)), dim=1
)
return sequences
def setup_model_parallel() -> Tuple[int, int]:
local_rank = int(os.environ.get("LOCAL_RANK", -1))
world_size = int(os.environ.get("WORLD_SIZE", -1))
print("local_rank:", local_rank, "world_size:", world_size)
torch.distributed.init_process_group("nccl")
initialize_model_parallel(world_size)
torch.cuda.set_device(local_rank)
# seed must be the same in all processes
torch.manual_seed(1)
return local_rank, world_size
def setup_model_deepspeed() -> Tuple[int, int]:
local_rank = int(os.environ.get("LOCAL_RANK", -1))
world_size = int(os.environ.get("WORLD_SIZE", -1))
deepspeed.init_distributed()
torch.cuda.set_device(local_rank)
# seed must be the same in all processes
torch.manual_seed(1)
return local_rank, world_size
def load_checkpoints(
ckpt_dir: str, local_rank: int, world_size: int
) -> Tuple[dict, dict]:
checkpoints = sorted(Path(ckpt_dir).glob("*.pth"))
assert world_size == len(checkpoints), (
f"Loading a checkpoint for MP={len(checkpoints)} but world "
f"size is {world_size}"
)
ckpt_path = checkpoints[local_rank]
print("Loading")
checkpoint = torch.load(ckpt_path, map_location="cpu")
with open(Path(ckpt_dir) / "params.json", "r") as f:
params = json.loads(f.read())
return checkpoint, params
def load_model(
ckpt_dir: str,
tokenizer_path: str,
local_rank: int,
world_size: int,
froze_embeddings: bool,
use_fairscale: bool,
max_batch_size: int = 32,
) -> Tuple[Transformer, HFLikeTokenizer]:
checkpoint, params = load_checkpoints(ckpt_dir, local_rank, world_size)
model_args: ModelArgs = ModelArgs(
max_seq_len=1024, max_batch_size=max_batch_size, **params
)
model_args.froze_embeddings = froze_embeddings
model_args.use_fairscale = use_fairscale
tokenizer = Tokenizer(model_path=tokenizer_path)
model_args.vocab_size = tokenizer.n_words
torch.set_default_tensor_type(torch.cuda.HalfTensor)
model = Transformer(model_args)
torch.set_default_tensor_type(torch.FloatTensor)
model.load_state_dict(checkpoint, strict=False)
tokenizer = HFLikeTokenizer(tokenizer)
return model, tokenizer
def load_tokenizer(tokenizer_path: str):
tokenizer = Tokenizer(model_path=tokenizer_path)
return tokenizer
def load_tokenizer_test(tokenizer_path: Optional[str] = None):
tokenizer = MyTokenizer(model_path=tokenizer_path)
return tokenizer
def load_model_test(
ckpt_dir: str,
tokenizer_path: str,
local_rank: int,
world_size: int,
froze_embeddings: bool,
use_fairscale: bool,
max_batch_size: int = 32,
) -> Tuple[Transformer, HFLikeTokenizer]:
# test the model with hf tokenizer
model_args = ModelArgs()
model_args.froze_embeddings = froze_embeddings
model_args.use_fairscale = use_fairscale
tokenizer = MyTokenizer(model_path=tokenizer_path)
model_args.vocab_size = tokenizer.n_words
model = Transformer(model_args).cuda()
tokenizer = HFLikeTokenizer(tokenizer)
return model, tokenizer
================================================
FILE: optimization/chatllama/chatllama/rlhf/__init__.py
================================================
"""RLHF implementation inspired to Lucidrains' implementation."""
================================================
FILE: optimization/chatllama/chatllama/rlhf/actor.py
================================================
import json
import yaml
import os
import shutil
import deepspeed
import torch
from accelerate import Accelerator
from beartype import beartype
from beartype.typing import Tuple
from einops import rearrange
from peft import get_peft_model, LoraConfig, TaskType
from torch.utils.data import DataLoader, Dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
)
from chatllama.rlhf.config import ConfigActor
from chatllama.rlhf.model_list import (
hf_models_causal_lm,
llama_models,
hf_models,
)
from chatllama.rlhf.model_loader import ModelLoader
from chatllama.rlhf.utils import TrainingStats
class ActorModel(torch.nn.Module):
"""Actor model that generates the augmented prompt from the initial
user_input. The aim is to train this model to generate better prompts.
Attributes:
model: The model from LLaMA to be used
tokenizer: The LLaMA tokenizer
config (ConfigActor): Configuration for the actor model
Methods:
load: Load the model from a path
save: Save the model to a path
forward: Compute the action logits for a given sequence.
generate: Generate a sequence from a given prompt
"""
def __init__(self, config: ConfigActor) -> None:
super().__init__()
# save config
self.config = config
# initialize the self.model
if config.model in llama_models:
# llama module might not be present when HF models are used
from chatllama.llama_model import (
load_model,
setup_model_parallel,
) # noqa
local_rank, world_size = setup_model_parallel()
# use load_model_test for testing
self.model, self.tokenizer = load_model(
ckpt_dir=config.model_folder,
tokenizer_path=config.tokenizer_path,
local_rank=local_rank,
world_size=world_size,
froze_embeddings=config.froze_embeddings,
use_fairscale=config.use_fairscale,
max_batch_size=config.batch_size,
)
elif config.model in hf_models_causal_lm:
self.tokenizer = self.load_tokenizer(config)
self.model = AutoModelForCausalLM.from_pretrained(
config.model,
)
# Setup PEFT model
if config.peft_enable:
# check that the peft config exist
if os.path.exists(config.peft_config_path):
# Read the peft config from yaml
with open(config.peft_config_path, "r") as c:
config_peft = yaml.safe_load(c)
else:
raise ValueError(
f"PEFT config {config.peft_config_path} not found"
)
print(config_peft)
# define lora config for peft
peft_config = LoraConfig(
task_type=TaskType.CAUSAL_LM, **config_peft
)
# create peft model
self.model = get_peft_model(
model=self.model,
peft_config=peft_config,
)
self.model.to(config.device)
else:
raise ValueError(f"Model {config.model} not supported")
# load the model from model_folder
self.load()
@beartype
def load(self) -> None:
"""Load the model from the path"""
# check if there is a model to load
path = ModelLoader.check_model_path(
config=self.config,
is_checkpoint=False,
current_epoch=None,
)
# if there is a model to load
if path is not None:
# load the model
print("Loading ...")
model_dict = torch.load(path)
self.model.load_state_dict(model_dict.get("state_dict") or model_dict.get("model"))
@beartype
def save(self) -> None:
"""Save the model to the path"""
# get the path to save the model
model_folder, model_name, path = ModelLoader.get_model_path(
config=self.config,
is_checkpoint=False,
current_epoch=None,
)
# save the model
print(f"Saving model to {path} ...")
torch.save(
{"state_dict": self.model.state_dict()},
path,
)
@staticmethod
def load_tokenizer(config: ConfigActor):
"""Load the tokenizer from the model name"""
if config.model in hf_models:
# load the tokenizer from HF
tokenizer = AutoTokenizer.from_pretrained(
config.model,
padding_side="left",
padding=True,
truncation=True,
model_max_length=config.max_sequence_length,
)
# add eos token if not present
if tokenizer.eos_token is None:
tokenizer.eos_token = "</s>"
tokenizer.eos_token_id = 2 # OPT eos-token-id
# add pad token if not present
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
elif config.model in llama_models:
# llama module might not be present when HF models are used
from chatllama.llama_model import (
load_tokenizer,
) # noqa
tokenizer = load_tokenizer(config.tokenizer_path)
return tokenizer
def parameters(self):
"""Return the parameters of the model"""
return self.model.parameters()
@beartype
def forward(
self, sequences: torch.Tensor, sequences_mask: torch.Tensor
) -> torch.Tensor:
"""Generate logits to have probability distribution over the vocabulary
of the actions
Args:
sequences (torch.Tensor): Sequences of states and actions used to
compute token logits for the whole list of sequences
attention_mask (torch.Tensor): Mask for the sequences attention
Returns:
logits (torch.Tensor): Logits for the actions taken
"""
model_output = self.model.forward(
sequences, attention_mask=sequences_mask
)
# need to return logits for the actions
if self.config.model in hf_models_causal_lm:
model_output = model_output.logits
if self.config.debug:
print("ActorModel.forward")
print("model_output_logits shape", model_output.shape)
print("model_output logits", model_output)
return model_output
@beartype
@torch.no_grad()
def generate(
self, states: torch.Tensor, state_mask: torch.Tensor
) -> Tuple:
"""Generate actions and sequences=[states, actions] from state
(i.e. input of the prompt generator model)
Args:
state (torch.Tensor): the input of the user
state_mask (torch.Tensor): Mask for the state input (for padding)
Returns:
actions (torch.Tensor): Actions generated from the state
sequences (torch.Tensor): Sequences generated from the
state as [states, actions]
"""
# temperature for the actor
temperature = self.config.temperature
# max sequence length for the actor (i.e. prompt + completion)
max_sequence_length = self.config.max_sequence_length
# max and min number of tokens to generate
max_tokens = self.config.max_tokens
min_tokens = self.config.min_tokens
# max generation possible given the state and the max sequence length
max_generation_possible = max_sequence_length - states.shape[1]
if max_generation_possible < min_tokens:
raise ValueError(
f"The prompt is too long w.r.t the "
f"model sequence length \n"
f"max_sequence_length={max_sequence_length}\n"
f"state_length={states.shape[1]}\n"
f"min_tokens={min_tokens}\n"
f"max_tokens={max_tokens}\n"
f"max_generation_possible={max_generation_possible}\n"
)
# take the minimum the max_tokens and the max_generation_possible
max_completion = min(max_tokens, max_generation_possible)
sequences = self.model.generate(
input_ids=states,
attention_mask=state_mask,
temperature=temperature,
max_new_tokens=max_completion,
no_repeat_ngram_size=3,
)
actions = sequences[:, states.shape[1] :] # noqa E203
if self.config.debug:
print(
f"input length {states.shape[1]} \n"
f"max sequence length {max_sequence_length} \n"
f"max completion {max_completion} \n"
f"generated sequence {sequences.shape[1]} \n"
)
print("ActorModel.generate")
print("state", states)
print("state shape", states.shape)
print("sequence shape", sequences.shape)
print("sequence", sequences)
print("actions shape", actions.shape)
print("actions", actions)
return actions, sequences
class ActorDataset(Dataset):
"""Dataset for the pretraining of the actor model
read a json file with the following format:
[
{
"user_input": "..."
"completion": "..."
},
...
]
Where:
user_input: the input of the user
completion: the output of the user
"""
def __init__(
self,
path: str,
) -> None:
self.path = path
with open(path, "r") as f:
data = json.load(f)
self.data = [d["user_input"] + d["completion"] for d in data]
def __getitem__(self, idx):
return self.data[idx]
def __len__(
self,
):
return len(self.data)
class ActorTrainer:
"""Used to pre-train the actor model to generate better prompts.
Args:
config (ConfigActor): Configuration for the actor model
Attributes:
config (ConfigActor): Configuration for the actor model
model (ActorModel): Actor model
loss_function (torch.nn.CrossEntropyLoss): Loss function
optimizer (torch.optim.Adam): Optimizer
validation_flag (bool): Flag to indicate if the validation dataset
is provided
train_dataset (ActorDataset): Training dataset
train_dataloader (DataLoader): Training dataloader
validation_dataset (ActorDataset): Validation dataset
validation_dataloader (DataLoader): Validation dataloader
scheduler (torch.optim.lr_scheduler): Learning rate scheduler
training_stats (TrainingStats): Training statistics
model_engine (ModelEngine): Model engine for deepspeed training
accelerator (Accelerator): Accelerator for accelerate training
Methods:
train: Train the actor model
load_checkpoint: Load a checkpoint
save_checkpoint: Save a checkpoint
"""
def __init__(self, config: ConfigActor) -> None:
# store config
self.config = config
# load the model
self.actor = ActorModel(config)
# define loss function
self.loss_function = torch.nn.CrossEntropyLoss()
# define optimizer
self.optimizer = torch.optim.AdamW(
self.actor.parameters(), lr=config.lr, weight_decay=1e-5
)
# check if validation dataset is provided
self.validation_flag = False
if config.validation_dataset_path is not None:
self.validation_flag = True
# create dataset and dataloaders
self.train_dataset = ActorDataset(config.train_dataset_path)
self.train_dataloader = DataLoader(
self.train_dataset, batch_size=config.batch_size
)
if self.validation_flag:
self.eval_dataset = ActorDataset(config.validation_dataset_path)
self.validation_dataloader = DataLoader(
self.eval_dataset, batch_size=config.batch_size
)
# define scheduler for the learning rate
# learning rate is decreased until 10% of the initial value
self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
self.optimizer,
T_0=len(self.train_dataset) // config.batch_size,
T_mult=1,
eta_min=config.lr * 0.1,
)
# define training statistics
stat_path = ModelLoader.get_training_stats_path(config)
self.training_stats = TrainingStats(stat_path)
# consistency check between accelerate and deepspeed
if config.accelerate_enable and config.deepspeed_enable:
raise ValueError(
"Both DeepSpeed and Accelerate are enabled for the Actor."
"Please choose one of them."
)
# initialize deepspeed
self.model_engine = None
if config.deepspeed_enable is True:
if config.deepspeed_config_path is None:
raise ValueError(
"DeepSpeed config path is None, but deepspeed is enabled"
)
if os.path.exists(config.deepspeed_config_path) is False:
raise ValueError(
f"DeepSpeed config path {config.deepspeed_config_path}"
f"does not exist"
)
(
self.model_engine,
self.optimizer,
self.train_dataloader,
_,
) = deepspeed.initialize(
args=None,
model=self.actor,
model_parameters=self.actor.parameters(),
training_data=self.train_dataset,
config=self.config.deepspeed_config_path,
)
print("Training with DeepSpeed")
# initialize accelerate
self.accelerator = None
if config.accelerate_enable is True:
self.accelerator = Accelerator()
(
self.actor,
self.optimizer,
self.train_dataloader,
self.scheduler,
) = self.accelerator.prepare(
self.actor,
self.optimizer,
self.train_dataloader,
self.scheduler,
)
print("Training with Accelerate")
@beartype
def save_checkpoint(
self,
current_epoch: int,
current_step: int,
max_epochs: int,
max_steps: int,
) -> None:
"""Save the current checkpoint
Args:
current_epoch (int): Current epoch
current_step (int): Current step
max_epochs (int): Maximum number of epochs
max_steps (int): Maximum number of steps
"""
print(
f"Saving checkpoint for epoch {current_epoch + 1}, "
f"step {current_step + 1} ..."
)
# look for path to save the checkpoint
model_folder, model_name, path = ModelLoader.get_model_path(
config=self.config,
is_checkpoint=True,
current_epoch=current_epoch,
current_step=current_step,
max_epochs=max_epochs,
max_steps=max_steps,
)
# remove the checkpoint if it already exists
if os.path.exists(path):
if self.config.deepspeed_enable:
shutil.rmtree(path)
else:
os.remove(path)
if self.config.deepspeed_enable:
client_state = {
"epoch": current_epoch,
"step": current_step,
}
self.model_engine.save_checkpoint(path, client_state=client_state)
else:
# save the checkpoint
torch.save(
{
"state_dict": self.actor.model.state_dict(),
"optim_state_dict": self.optimizer.state_dict(),
"training_stats": self.training_stats,
"epoch": current_epoch,
"step": current_step,
},
path,
)
# remove old checkpoints
n_checkpoints_to_keep = self.config.n_checkpoints_to_keep
ModelLoader.delete_old_checkpoints(
model_folder, model_name, n_checkpoints_to_keep
)
@beartype
def load_checkpoint(
self,
) -> Tuple[int, int]:
"""Load a checkpoint from the model folder
Returns:
Tuple[int, int]: Current epoch and current step to resume
training
"""
print("Looking for checkpoints...")
# look for a checkpoint
path = ModelLoader.check_model_path(
config=self.config,
is_checkpoint=True,
current_epoch=None,
)
# if there is a checkpoint
if path is not None:
print("Loading ...")
if self.config.deepspeed_enable:
# try to load the checkpoint
try:
_, client_state = self.model_engine.load_checkpoint(path)
except Exception:
print(
"Checkpoint corrupted!"
"Try to remove the last checkpoint."
"Now Starting from epoch 0, step 0"
)
return 0, 0
# load epoch and step to resume loops
epoch = client_state["epoch"]
step = client_state["step"]
else:
# try to load the checkpoint
try:
checkpoint = torch.load(path)
except Exception:
print(
"Checkpoint corrupted!"
"Try to remove the last checkpoint."
"Now Starting from epoch 0, step 0"
)
return 0, 0
# assing the checkpoint to the model
epoch = checkpoint["epoch"]
self.actor.model.load_state_dict(checkpoint["state_dict"])
self.optimizer.load_state_dict(checkpoint["optim_state_dict"])
self.trainign_stats = checkpoint["training_stats"]
step = checkpoint["step"]
return epoch, step + 1 # return the next episode to train
return 0, 0
def add_eos_token(
self, tokens: torch.Tensor, mask: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
# given tokens and mask, add eos token to the end of each sequence
# and update the mask
batch_size, seq_len = tokens.shape
eos_token = self.actor.tokenizer.eos_token_id
# see if i can append 1 token
n_tokens_to_append = min(self.config.max_sequence_length - seq_len, 1)
n_tokens_to_append = max(n_tokens_to_append, 0)
# concatenate eos to tokens and mask
if n_tokens_to_append > 0:
tokens = torch.cat(
[
tokens,
torch.ones(batch_size, n_tokens_to_append)
.long()
.to(tokens.device)
* eos_token,
],
dim=1,
)
mask = torch.cat(
[
mask,
torch.ones(batch_size, n_tokens_to_append)
.long()
.to(mask.device),
],
dim=1,
)
return tokens, mask
def train(
self,
) -> None:
"""Train the model"""
print("Start Actor Model Pretraining")
# get config parameters
if self.config.deepspeed_enable:
batch_size = self.train_dataloader.batch_size
else:
batch_size = self.config.batch_size
epochs = self.config.epochs
device = self.config.device
checkpoint_steps = self.config.checkpoint_steps
# compute the number of iterations
n_iter = int(len(self.train_dataset) / batch_size)
# load model_checkpoint
start_epoch, start_step = self.load_checkpoint()
if start_epoch == 0 and start_step == 0:
self.training_stats.clear()
# counter for the checkpoint
cnt_checkpoint = 1
# traing loop
for epoch in range(start_epoch, epochs):
self.actor.train()
for i, input_text in enumerate(self.train_dataloader):
# skip the first steps if we are resuming training
if i < start_step:
continue
# tokenize input
with torch.no_grad():
input_tokenized = self.actor.tokenizer(
input_text,
return_tensors="pt",
truncation=True,
padding=True,
)
# split tokens and mask
input_tokenized_id = input_tokenized["input_ids"]
input_tokenized_mask = input_tokenized["attention_mask"]
# add eos token
(
input_tokenized_id,
input_tokenized_mask,
) = self.add_eos_token(
input_tokenized_id,
input_tokenized_mask,
)
# split into input and output
training_output = input_tokenized_id[:, 1:]
training_input = input_tokenized_id[:, :-1]
attention_mask = input_tokenized_mask[:, :-1]
# move to device
training_output = training_output.to(device)
training_input = training_input.to(device)
attention_mask = attention_mask.to(device)
# forward pass
if self.config.deepspeed_enable:
est_output = self.model_engine(
training_input, attention_mask
)
else:
est_output = self.actor(training_input, attention_mask)
# compute loss
est_output = rearrange(est_output, "b s v -> (b s) v")
training_output = rearrange(training_output, "b s -> (b s)")
loss = self.loss_function(est_output, training_output)
self.training_stats.training_loss.append(loss.item())
# backward pass
if self.config.deepspeed_enable:
self.model_engine.backward(loss)
self.model_engine.step()
elif self.config.accelerate_enable:
self.optimizer.zero_grad()
self.accelerator.backward(loss)
self.optimizer.step()
self.scheduler.step()
else:
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
self.scheduler.step()
# print progress
if i % self.config.iteration_per_print == 0:
print(
f"Epoch: {epoch+1}/{epochs}, "
f"Iteration: {i+1}/{n_iter}, "
f"Training Loss: {loss}"
)
# save checkpoint periodically
if cnt_checkpoint % checkpoint_steps == 0:
self.save_checkpoint(epoch, i, epochs, n_iter)
self.training_stats.save()
cnt_checkpoint = 1
else:
cnt_checkpoint += 1
# Validation
if self.validation_flag:
self.actor.eval()
with torch.no_grad():
for i, input_text in enumerate(self.validation_dataloader):
# tokenize input
input_tokenized = self.actor.tokenizer(
input_text, return_tensors="pt", padding=True
)
validation_output = input_tokenized["input_ids"][:, 1:]
validation_input = input_tokenized["input_ids"][:, :-1]
attention_mask = input_tokenized["attention_mask"][
:, :-1
]
# forward pass
est_output = self.actor.forward(
validation_input, attention_mask
)
validation_output = rearrange(
validation_output, "b s -> (b s)"
)
# compute loss
est_output = rearrange(est_output, "b s v -> (b s) v")
loss = self.loss_function(
est_output, validation_output
)
self.training_stats.validation_loss.append(loss.item())
# print progress
if i % self.config.iteration_per_print == 0:
print(
f"Epoch: {epoch+1}/{epochs}, "
f"Iteration: {i+1}/{n_iter}, "
f"Validation Loss: {loss}"
)
# reset start_step after training is resumed
start_step = 0
# save the model
self.actor.save()
print("Training Finished ")
================================================
FILE: optimization/chatllama/chatllama/rlhf/config.py
================================================
import yaml
import os
from dataclasses import dataclass
import torch
from beartype import beartype
from beartype.typing import Optional
@dataclass
class ConfigReward:
"""Config parameters for the reward model
Attributes:
device (torch.device): Device to be used for the reward model
model (str): Model to be used for the reward model
model_folder (str): Path to the folder where model are stored (used
to load / store finetuned model or checkpoints)
model_head_hidden_size (int): Hidden size of the reward model head
max_sequence_length (int): Max sequence length of the reward model
train_dataset_path (Optional[str]): Path to the training dataset.
Default to None. To be specified only for the reward model trainig.
validation_dataset_path (Optional[str]): Path to the validation
dataset. Default to None. To be specified only for the reward
model trainig.
batch_size (Optional[int]): Batch size to train the reward model.
Default to None. To be specified only for the reward model
trainig.
epochs (Optional[int]): Number of epochs to train the reward model.
Default to None. To be specified only for the reward model
trainig.
iteration_per_print (Optional[int]): Number of iterations to print
the training loss. Default to None. To be specified only for the
reward model trainig.
checkpoint_steps (Optional[int]): Number of steps (backProp) to
interleave checkpoints. Default to None. To be specified only for
the reward model trainig.
checkpoint_name (Optional[str]): Name of the checkpoint. Default to
None.
lr (Optional[float]): Learning rate for the reward model. Default to
None. To be specified only for the reward model distillation.
llm_enable (bool): Enable reward model distillation. Default to True.
Disable it if you dont have an API key.
llm_model (Optional[str]): Model to be used for the reward model
distillation. Default to "text-davinci-003".
llm_temperature (Optional[float]): Temperature for the reward model
distillation. Default to 0.9.
llm_max_tokens (Optional[int]): Max tokens for the reward model
distillation. Default to 64.
deepspeed_enable (bool): Enable deepspeed for the reward model
training. Default to False.
deepspeed_config_path (str): Path to the deepspeed config file.
Default to None.
is_reward (bool): True if the model is a reward model. Default to True.
accelerate_enable (bool): Enable accelerate for the reward model
debug (bool): enable prints for Debugging
"""
device: torch.device
model: str
model_folder: str
model_head_hidden_size: int
max_sequence_length: int
train_dataset_path: Optional[str] = None
validation_dataset_path: Optional[str] = None
batch_size: Optional[int] = None
epochs: Optional[int] = None
iteration_per_print: Optional[int] = None
checkpoint_steps: Optional[int] = None
checkpoint_name: Optional[str] = None
lr: Optional[float] = None
llm_enable: Optional[bool] = False
llm_model: Optional[str] = "text-davinci-003"
llm_temperature: Optional[float] = 0.9
llm_max_tokens: Optional[int] = 64
deepspeed_enable: bool = False
deepspeed_config_path: Optional[str] = None
# critic specific parameters
is_reward: bool = True
accelerate_enable: bool = False
debug: bool = False
# just for naming consistency
ConfigCritic = ConfigReward
@dataclass
class ConfigActor:
"""Config parameters for models
Attributes:
model (str): Model to be used for the actor
model_folder (str): Path to the folder where model are stored (used
to load / store finetuned model or checkpoints)
tokenizer_path (str): Path to the folder where tokenizer are stored
train_dataset_path (str): Path to the training dataset
validation_dataset_path (Optional[str]): Path to the validation dataset
froze_embeddings (bool): Froze embeddings for the actor
use_fairscale (bool): Use fairscale module for the actor instead of
pytorch native modules.
max_sequence_length (int): Max sequence length for the actor
max_tokens (int): Max tokens for actor generation
min_tokens (int): Min tokens for actor generation
additonal_prompt_tokens (int): Number of tokens to be used as safety
to avoid too large sequences and to add a template to the
dataset
temperature (float): Temperature for the actor
batch_size (int): Batch size to train the actor
iteration_per_print (int): Number of iterations to print the
training loss
lr (float): Learning rate for the actor
epochs (int): Number of epochs to train the actor
checkpoint_steps (int): Number of steps (backProp) to interleave
checkpoints.
n_checkpoints_to_keep (int): Number of checkpoints to keep
for the actor.
deepspeed_enable (bool): Enable deepspeed for the actor.
Default to False.
deepspeed_config_path (str): Path to the deepspeed config file.
Default to None.
accelerate_enable (bool): Enable accelerate for the actor
device (torch.device): Device to be used for the actor
checkpoint_name (Optional[str]): Name of the checkpoint. Default to
None.
peft_enable (bool): Enable peft for the actor
peft_config_path (str): Path to the peft config file.
debug (bool): Enable prints for debugging
"""
model: str
model_folder: str
tokenizer_path: str
train_dataset_path: str
validation_dataset_path: Optional[str]
froze_embeddings: bool
use_fairscale: bool
max_sequence_length: int
max_tokens: int
min_tokens: int
additonal_prompt_tokens: int
temperature: float
batch_size: int
iteration_per_print: int
lr: float
epochs: int
checkpoint_steps: int
n_checkpoints_to_keep: int
deepspeed_enable: bool
deepspeed_config_path: Optional[str]
accelerate_enable: bool
device: torch.device
peft_enable: bool
peft_config_path: str
checkpoint_name: Optional[str] = None
debug: bool = False
@dataclass
class ConfigTrainer:
"""Config parameters for the trainer, used to configure the reinforcement
learning training loop
Attributes:
actor_lr (float): Learning rate for the actor when training with
reinforcement learning
critic_lr (float): Learning rate for the critic when training with
reinforcement learning
actor_eps_clip (float): Epsilon clip for the actor
critic_eps_clip (float): Epsilon clip for the critic
beta_s (float): Beta for the actor and critic
gamma (float): coefficient for the discounted rewards.
examples_path (str): Path to the examples dataset
num_episodes (int): Number of episodes, each episodes consist of
a number of timesteps that are used to generate examples
stored in the memory buffer.
max_timesteps (int): Max timesteps for the actor and critic.
for each timestep a set of examples are sampled and used to
generate a completion and a reward.
update_timesteps (int): Number of timesteps to update the actor and
critic
num_examples (int): Number of examples to generate for the actor
and critic. For each iteration of timestep, num_examples are
sampled from the prompt dataset, processed and stored in the
memory buffer.
batch_size (int): Batch size to train the actor and critic.
This batch is used to aggregate the memory from the memory buffer
for the actual training of the actor and critic models.
epochs (int): Number of epochs to train the actor and critic.
checkpoint_steps (int): Number of episodes to interleave checkpoints.
device (torch.device): Device to be used for the actor and critic
checkpoint_name (Optional[str]): Name of the checkpoint. Default to
None.
"""
actor_lr: int
critic_lr: int
actor_eps_clip: float
critic_eps_clip: float
beta_s: float
gamma_discounted: float
examples_path: str
num_episodes: int
max_timesteps: int
update_timesteps: int
num_examples: int
batch_size: int
epochs: int
checkpoint_steps: int
device: torch.device
checkpoint_name: Optional[str] = None
debug: bool = False
class Config:
"""Store the config parameters for the whole pipeline
Args:
trainer_dict (Optional[Dict]): Dictionary with the config parameters
for the trainer. Default to None. If None, the config.yaml is
used.
actor_dict (Optional[Dict]): Dictionary with the config parameters
for the actor. Default to None. If None, the config.yaml is
used.
critic_dict (Optional[Dict]): Dictionary with the config parameters
for the critic. Default to None. If None, the config.yaml is
used.
reward_dict (Optional[Dict]): Dictionary with the config parameters
for the reward. Default to None. If None, the config.yaml is
used.
device (Optional[torch.device]): Device to be used for the actor
and critic. Default to None. If None, the device available is
used.
debug (Optional[bool]): Enable prints for debugging. Default to False.
Attributes:
trainer (ConfigTrainer): Config parameters for the trainer
actor (ConfigActor): Config parameters for the actor
critic (ConfigCritic): Config parameters for the critic
reward (ConfigReward): Config parameters for the reward
"""
@beartype
def __init__(
self,
path: str,
device: Optional[torch.device] = None,
debug: Optional[bool] = False,
) -> None:
# if not specified use the device available
if device is None:
if torch.cuda.is_available():
device = torch.device("cuda")
else:
raise ValueError("No GPU available")
print(f"Current device used :{str(device)}")
if path is None or os.path.exists(path) is False:
raise ValueError("Path to the config.yaml is not valid")
# Read the config from yaml
with open(path, "r") as c:
config = yaml.safe_load(c)
trainer_dict = config["trainer_config"]
actor_dict = config["actor_config"]
critic_dict = config["critic_config"]
reward_dict = config["reward_config"]
# Trainer Config
trainer_dict["device"] = device
trainer_dict["debug"] = debug
self.trainer = ConfigTrainer(**trainer_dict)
# Actor Config
actor_dict["device"] = device
actor_dict["debug"] = debug
self.actor = ConfigActor(**actor_dict)
# Critic Config
critic_dict["device"] = device
critic_dict["debug"] = debug
self.critic = ConfigCritic(**critic_dict)
self.critic.is_reward = False
# Reward Config
reward_dict["device"] = device
reward_dict["debug"] = debug
self.reward = ConfigReward(**reward_dict)
================================================
FILE: optimization/chatllama/chatllama/rlhf/dataset.py
================================================
import json
import os
import numpy as np
from beartype.typing import Dict, List, Union
from datasets import load_dataset
from chatllama.rlhf.config import Config, ConfigActor, ConfigReward
from chatllama.rlhf.reward import RewardModel, CriticModel
from chatllama.rlhf.actor import ActorModel
ConfigType = Union[Config, ConfigActor, ConfigReward]
class BaseDataset:
def __init__(
self,
) -> None:
pass
@staticmethod
def sort_conversation(
conversations: List[Dict],
only_input: bool = False,
reverse: bool = True,
shuffle: bool = True,
) -> List[Dict]:
"""Sort the conversations by length of user_input + completion
or by length of user_input only
Args:
conversations (List[Dict]): list of conversations
only_input (bool, optional): sort by length of user_input only.
Defaults to False.
reverse (bool, optional): sort in descending order.
Defaults to True.
shuffle (bool, optional): shuffle the dataset leaving only the
first 100 samples sorted. Defaults to True.
Returns:
List[Dict]: sorted list of conversations
"""
# define the sorting function
if only_input is True:
def sort_fun(x):
return len(x["user_input"])
else:
def sort_fun(x):
return len(x["user_input"]) + len(x["completion"])
# sort
conversations = sorted(
conversations,
key=sort_fun,
reverse=reverse,
)
# shuffle
if shuffle is True:
conversations = (
conversations[:10]
+ np.random.choice(
conversations[10:],
size=len(conversations[10:]),
replace=False,
).tolist()
)
return conversations
@staticmethod
def take_n_samples(
conversations: List[Dict],
n: int,
) -> List[Dict]:
"""Take N samples from the dataset
Args:
conversations (List[Dict]): list of conversations
n (int): number of samples to take randomly
Returns:
List[Dict]: list of N samples
"""
# sample N number of index from 0 to len(conversations)
indexes = np.random.choice(len(conversations), size=n, replace=False)
# take the samples
conversations = [conversations[i] for i in indexes]
return conversations
@staticmethod
def clean_dataset(config: ConfigType):
"""Clean the datasets by removing too long examples
The Reward Dataset constraints are:
- user_input + completion < Reward model max sequence length
The Actor Dataset constraints are:
- user_input + completion < Actor model max sequence length
The RLHF Training Dataset constraints are:
- user_input + min_completion < Actor model max sequence length
- user_input + min_completion < Critic model max sequence length
- user_input + min_completion < Reward model max sequence length
Args:
config (Config): config object
"""
if isinstance(config, Config):
print("Start cleaning the dataset for RLHF")
# constraints
r_model_max_seq_len = config.reward.max_sequence_length
a_model_max_seq_len = config.actor.max_sequence_length
c_model_max_seq_len = config.critic.max_sequence_length
min_completion = config.actor.min_tokens
# dataset
dataset_path = config.trainer.examples_path
# tokenizers
r_tokenizer = RewardModel.load_tokenizer(config.reward)
a_tokenizer = ActorModel.load_tokenizer(config.actor)
c_tokenizer = CriticModel.load_tokenizer(config.critic)
# safety tokens
safety_tokens = config.actor.additonal_prompt_tokens
elif isinstance(config, ConfigActor):
print("Start cleaning the dataset for Actor")
# constraint
a_model_max_seq_len = config.max_sequence_length
# dataset
dataset_path = config.train_dataset_path
# tokenizer
a_tokenizer = ActorModel.load_tokenizer(config)
# safety tokens
safety_tokens = config.additonal_prompt_tokens
elif isinstance(config, ConfigReward):
print("Start cleaning the dataset for Reward")
# constraint
r_model_max_seq_len = config.max_sequence_length
# dataset
dataset_path = config.train_dataset_path
# tokenizer
r_tokenizer = RewardModel.load_tokenizer(config)
# if there is the datasets
if os.path.exists(dataset_path):
# load the dataset
with open(dataset_path, "r") as f:
conversations = json.load(f)
# sort in desceding order - longest first
if isinstance(config, Config):
conversations = BaseDataset.sort_conversation(
conversations,
only_input=True,
reverse=True,
)
else:
conversations = BaseDataset.sort_conversation(
conversations,
only_input=False,
reverse=True,
)
old_len = len(conversations)
# remove too long examples
# since datasets are ordered by the length
# we can remove the first elements until we find
# an example that is not too long
while len(conversations) > 0:
# get the text to be tokenized
if isinstance(config, Config):
text = conversations[0]["user_input"]
else:
text = (
conversations[0]["user_input"]
+ conversations[0]["completion"]
)
# remove elements from RLHF dataset
if isinstance(config, Config):
a_tokens = a_tokenizer.encode(text, truncation=False)
r_tokens = r_tokenizer.encode(text, truncation=False)
c_tokens = c_tokenizer.encode(text, truncation=False)
if (
len(a_tokens) + min_completion + safety_tokens
> a_model_max_seq_len
):
conversations.pop(0)
elif (
len(r_tokens) + min_completion + safety_tokens
> r_model_max_seq_len
):
conversations.pop(0)
elif (
len(c_tokens) + min_completion + safety_tokens
> c_model_max_seq_len
):
conversations.pop(0)
else:
break
# remove elements from Actor dataset
elif isinstance(config, ConfigActor):
tokens = a_tokenizer.encode(text, truncation=False)
if len(tokens) + safety_tokens > a_model_max_seq_len:
conversations.pop(0)
else:
break
# remove elements from Reward dataset
elif isinstance(config, ConfigReward):
tokens = r_tokenizer.encode(text, truncation=False)
if len(tokens) > r_model_max_seq_len:
conversations.pop(0)
else:
break
# if the number of examples has changed
if len(conversations) != old_len:
print("Number of examples before cleaning: ", old_len)
print(
"Number of examples after cleaning: ", len(conversations)
)
# remove the old dataset
os.remove(dataset_path)
# save the new dataset
with open(dataset_path, "w") as f:
json.dump(conversations, f, indent=4)
else:
print("Dataset is already clean")
else:
print(
f"Dataset not found at {dataset_path}"
f" Skipping cleaning of the dataset"
)
class StanfordNLPSHPDataset(BaseDataset):
"""Class for Stanford NLP SHP dataset from HuggingFace"""
def __init__(
self,
) -> None:
print("Download the dataset")
self.dataset = load_dataset("stanfordnlp/SHP")
print("Download Completed")
def reformat_dataset(self, data: List) -> List[Dict]:
"""Reformat the dataset to the format required by RLHF
Args:
data (List): dataset from HuggingFace
Returns:
List[Dict]: reformatted dataset
"""
# initialize conversations
conversations = []
# loop over the dataset
for i, d in enumerate(data):
if d["score_A"] > d["score_B"]:
response = d["human_ref_A"]
else:
response = d["human_ref_B"]
# compose user_input template
user_input = d["history"].rstrip("\n")
user_input = "Human: " + d["history"] + "\n\n##\n\n"
# compose completion template
completion = "Assistant: " + response
conv = {
"user_input": user_input,
"completion": completion,
"score": None,
}
conversations.append(conv)
return conversations
def save_dataset(
self, dataset_folder: str, number_of_samples: int, reverse: bool = True
) -> None:
"""Save the dataset in the format required by RLHF
Args:
dataset_folder (str): path to the folder where the dataset
will be saved
number_of_samples (int): number of samples to take from the
dataset
reverse (bool, optional): sort the dataset in descending order.
Defaults to True.
"""
print("Generate datasets for RLHF")
# take the train and test dataset to create the finetuning dataset
conversations = self.reformat_dataset(self.dataset["train"])
conversations.extend(self.reformat_dataset(self.dataset["test"]))
# sort conversations by length of user_input + completion
conversations = self.sort_conversation(conversations, reverse=reverse)
# save actor training data
with open(f"{dataset_folder}/actor_training_data.json", "w") as f:
json.dump(conversations, f, indent=4)
# take N samples and sort them
conversations = self.take_n_samples(conversations, number_of_samples)
conversations = self.sort_conversation(conversations, reverse=reverse)
# save reward training data
with open(f"{dataset_folder}/reward_training_data.json", "w") as f:
json.dump(conversations, f, indent=4)
# take the validation dataset for rlhf
conversations = self.reformat_dataset(self.dataset["validation"])
# sort the validation dataset
conversations = self.sort_conversation(
conversations,
only_input=True,
reverse=reverse,
)
# save rlhf training data
with open(f"{dataset_folder}/rlhf_training_data.json", "w") as f:
json.dump(conversations, f, indent=4)
print("Generation Completed")
class AnthropicRLHF(BaseDataset):
def __init__(
self,
) -> None:
print("Download the dataset")
self.dataset = load_dataset("Anthropic/hh-rlhf")
print("Download Completed")
def reformat_dataset(self, data: List) -> List[Dict]:
"""Reformat the dataset to the format required by RLHF
Args:
data (List): dataset from HuggingFace
Returns:
List[Dict]: reformatted dataset
"""
conversations = []
for _, d in enumerate(data):
current_conv = d["chosen"]
split_answer = current_conv.split("Assistant:")
# take all the list element in split_answer except the last one
# and joing them with "Assistant:" in a unique string
previous_convers = split_answer[0]
for i, s in enumerate(split_answer[1:-1]):
previous_convers += "Assistant:" + s
# remove the last characters if they are "\n" from the previous
# conversation
previous_convers = previous_convers.rstrip("\n")
user_input = previous_convers + "\n\n##\n\n"
completion = "Assistant: " + split_answer[-1]
conv = {
"user_input": user_input,
"completion": completion,
"score": None,
}
conversations.append(conv)
return conversations
def save_dataset(
self, dataset_folder: str, number_of_samples: int, reverse: bool = True
) -> None:
"""Save the dataset in the format required by RLHF
Args:
dataset_folder (str): path to the folder where the dataset
will be saved
number_of_samples (int): number of samples to take from the
dataset
reverse (bool, optional): sort the dataset in descending order.
Defaults to True.
"""
print("Generate datasets for RLHF")
# generate actor and reward dataset
conversations = self.reformat_dataset(self.dataset["train"])
conversations = self.sort_conversation(conversations, reverse=reverse)
# save actor training data
with open(f"{dataset_folder}/actor_training_data.json", "w") as f:
json.dump(conversations, f, indent=4)
# sample N number of index from 0 to len(conversations)
conversations = self.take_n_samples(conversations, number_of_samples)
conversations = self.sort_conversation(conversations, reverse=reverse)
# save reward training data
with open(f"{dataset_folder}/reward_training_data.json", "w") as f:
json.dump(conversations, f, indent=4)
# rlhf dataset
conversations = self.reformat_dataset(self.dataset["test"])
# sort conversations by length of user_input
conversations = self.sort_conversation(
conversations, only_input=True, reverse=reverse
)
# save rlhf training data
with open(f"{dataset_folder}/rlhf_training_data.json", "w") as f:
json.dump(conversations, f, indent=4)
print("Generation Completed")
================================================
FILE: optimization/chatllama/chatllama/rlhf/model_list.py
================================================
# llama models
llama_models = ["llama-7B", "llama-13B", "llama-33B", "llama-65B"]
# HF Models
# encoder-decoder models TODO: still not supported
hf_models_seq_2_seq = [
"google/flan-t5-xxl",
"google/flan-t5-xl",
"google/flan-t5-large",
"google/flan-t5-base",
"google/flan-t5-small",
]
# decoder only TODO: codegen is still broken
hf_models_causal_lm = [
"facebook/opt-125m",
"facebook/opt-1.3b",
"facebook/opt-2.7b",
"facebook/opt-6.7b",
"facebook/opt-11b",
"facebook/galactica-125m",
"facebook/galactica-1.3b",
"facebook/galactica-6.7b",
"bigscience/bloom-560m",
"bigscience/bloomz-560m",
"bigscience/bloom-1b1",
"bigscience/bloomz-1b1",
"bigscience/bloom-1b7",
"bigscience/bloomz-1b7",
"bigscience/bloom-3b",
"bigscience/bloomz-3b",
"bigscience/bloom-7b1",
"bigscience/bloomz-7b1",
"EleutherAI/gpt-neo-1.3B",
"EleutherAI/gpt-neo-1.3B",
"EleutherAI/gpt-neox-20b",
"EleutherAI/gpt-j-6B",
"gpt2",
"gpt2-large",
"gpt2-xl",
"benjamin/gerpt2",
"benjamin/gerpt2-large",
"Salesforce/codegen-350M-mono",
"Salesforce/codegen-2B-mono",
"Salesforce/codegen-6B-mono",
"Salesforce/codegen-16B-mono",
]
# create a list of all the models from hf
hf_models = hf_models_seq_2_seq + hf_models_causal_lm
================================================
FILE: optimization/chatllama/chatllama/rlhf/model_loader.py
================================================
import os
import shutil
from beartype.typing import Union, Optional, Tuple
from chatllama.rlhf.config import (
Config,
ConfigActor,
ConfigCritic,
ConfigReward,
)
from chatllama.rlhf.model_list import hf_models
ConfigType = Union[Config, ConfigActor, ConfigCritic, ConfigReward]
class ModelLoader:
"""Class to load and save models and their checkpoints during training."""
def __init__(
self,
) -> None:
pass
@staticmethod
def get_training_stats_path(config: ConfigType) -> str:
"""Method to get the path to the training stats file. Used when saving
Args:
config (ConfigType): the config object
"""
model_folder, model_name, path = ModelLoader.get_model_path(
config, is_checkpoint=True
)
stat_path = os.path.join(model_folder, "training_stats.json")
return stat_path
@staticmethod
def look_for_last_checkpoint(
model_folder: str,
model_name: str,
) -> Optional[str]:
"""Method to look for the last checkpoint in the model folder
checkpoint are saved as {model_name}_epoch_{current_epoch}.pt
Args:
model_folder (str): the folder where the checkpoints are saved
model_name (str): the name of the model
"""
# remove .pt to model name
model_name = model_name.split(".")[0]
checkpoints = [
f for f in os.listdir(model_folder) if f.startswith(model_name)
]
if len(checkpoints) == 0:
return None
else:
checkpoints = sorted(checkpoints)
# get last checkpoint
last_checkpoint = checkpoints[-1]
return last_checkpoint
@staticmethod
def look_for_checkpoint_by_name(
model_folder: str,
checkpoint_name: str,
) -> Optional[str]:
"""Method to look for a particular checkpoint in the model folder
checkpoint are saved as
{model_name}_epoch_{current_epoch}_steps_{current_steps}.pt
Args:
model_folder (str): the folder where the checkpoints are saved
checkpoint_name (str): the name of the checkpoint
"""
# look for a file named checkpoint_name in the model folder
path = os.path.join(model_folder, checkpoint_name)
if os.path.exists(path):
return checkpoint_name
else:
return None
@staticmethod
def get_checkpoint_name(config: ConfigType) -> str:
if isinstance(config, Config):
return config.trainer.checkpoint_name
else:
return config.checkpoint_name
@staticmethod
def get_base_model_folder_from_config(config: ConfigType) -> str:
if isinstance(config, ConfigActor) or isinstance(config, ConfigReward):
return config.model_folder
elif isinstance(config, Config):
return config.actor.model_folder
else:
raise ValueError(
"Config type not recognized during saving or loading"
)
@staticmethod
def get_model_type_from_config(config: ConfigType) -> str:
if isinstance(config, ConfigReward):
# here use ad-hoc flag from config to distinguish between
# reward and critic
if config.is_reward:
return "reward"
else:
return "critic"
elif isinstance(config, ConfigActor):
return "actor"
elif isinstance(config, Config):
return "actor_rl"
@staticmethod
def get_model_name_from_config(config: ConfigType) -> str:
model_name = None
if isinstance(config, Config):
model_name = config.actor.model
elif isinstance(config, ConfigReward) or isinstance(
config, ConfigActor
):
model_name = config.model
if model_name in hf_models:
return os.path.split(model_name)[-1]
if model_name is None:
raise ValueError("Model name not found")
return model_name
@staticmethod
def delete_old_checkpoints(
model_folder: str, model_name: str, n_ckp_to_keep: int = 5
):
"""Method to discard old checkpoints, keeping only the last
n_ckp_to_keep
Args:
model_folder (str): the folder where the checkpoints are saved
model_name (str): the name of the model
n_ckp_to_keep (int): the number of checkpoints to keep
"""
# remove .pt to model name
model_name = model_name.split(".")[0]
checkpoints = [
f for f in os.listdir(model_folder) if f.startswith(model_name)
]
if len(checkpoints) == 0:
return
else:
checkpoints = sorted(checkpoints)
# check if the number of checkpoint is greater than 5
if len(checkpoints) > n_ckp_to_keep:
for c in checkpoints[:-n_ckp_to_keep]:
checkpoint_path = os.path.join(model_folder, c)
os.remove(checkpoint_path)
@staticmethod
def get_model_path(
config: ConfigType,
is_checkpoint: bool = False,
current_epoch: Optional[int] = None,
current_step: Optional[int] = None,
max_epochs: int = 1_000_000_000,
max_steps: int = 1_000_000_000,
) -> Tuple[str, str, Optional[str]]:
"""Method to get the path to the right model file. Used when saving
the model.
The hierarchy of the model folder is:
-- model_folder: here store the models trained, for each type of model
there is a dedicated folder
-- actor
-- critic
-- reward
-- actor_rl
-- checkpoints: here store the checkpoints during training, for
each type of model there is a dedicated folder
-- actor
-- critic
-- reward
-- actor_rl
Args:
config (ConfigType): the config object, contains info of the model
is_checkpoint (bool): if True, the path is for a checkpoint
current_epoch (Optional[int]): the current epoch, used to create
the checkpoint name. If is_checkpoint is True, and
current_epoch is None, return just the folder and the simple
model name for the possible checkpoint.
current_step (Optional[int]): the current step, used to create
the checkpoint name.
max_epochs (Optional[int]): the maximum number of epochs, used to
create the checkpoint name.
max_steps (Optional[int]): the maximum number of steps, used to
create the checkpoint name.
Returns:
model_folder (str): the folder where the model is saved
model_name (str): the name of the model
path (Optional[str]): the path to the model. If is_checkpoint is
True, and current_epoch is None, return None
"""
model_folder = ModelLoader.get_base_model_folder_from_config(config)
# Add the checkpoint path if necessary
if is_checkpoint:
model_folder = os.path.join(model_folder, "checkpoints")
# Create the folder for the model type
# (Actor, Critic, Reward, Actor_RL)
model_type = ModelLoader.get_model_type_from_config(config)
model_folder = os.path.join(model_folder, model_type)
# Make the path if not exists
if os.path.exists(model_folder) is False:
os.makedirs(model_folder, exist_ok=True)
print(f"Model folder does not exist. Creating it: {model_folder}")
# Create the model name
model_name = ModelLoader.get_model_name_from_config(config)
# If is a checkpoint and current epoch are available
# extend the model name with the epoch, if none epoch is provided
# just return the simple model name
if is_checkpoint and current_epoch is not None:
# number of characters to store the checkpoints
n_char = max(len(str(max_epochs)), len(str(max_steps)))
# create the string epoch such that it is always the same length
# equalt to n_char (i.e. 00000001) necessary for sorting
string_epoch = str(current_epoch)
string_epoch = "0" * (n_char - len(string_epoch)) + string_epoch
string_epoch = f"_epoch_{string_epoch}"
if current_step is not None:
string_step = str(current_step)
string_step = "0" * (n_char - len(string_step)) + string_step
string_step = f"_step_{string_step}"
model_name = f"{model_name}{string_epoch}{string_step}.pt"
else:
model_name = f"{model_name}{string_epoch}.pt"
else:
model_name = f"{model_name}.pt"
# if the epoch is not provided, and it is a checkpoint
# is impossible to know the path to the file.
# but we can know the model folder and the model name
if is_checkpoint and current_epoch is None:
path = None
else:
path = os.path.join(model_folder, model_name)
return model_folder, model_name, path
@staticmethod
def check_model_path(
config: ConfigType,
is_checkpoint: bool = False,
current_epoch: Optional[int] = None,
current_step: Optional[int] = None,
) -> Optional[int]:
"""Method to check if the model path exists to load models
or checkpoints.
Args:
config (ConfigType): the config object, contains info of the model
is_checkpoint (bool): if True, the path is for a checkpoint
current_epoch (Optional[int]): the current epoch.
is is_checkpoint is True, and current_epoch is None,
it will look for the last checkpoint and return it.
Returns:
path (Optional[str]): the path to the model. If is_checkpoint is
True, and current_epoch is None, search for the last checkpoint
and return it. If no checkpoint is found, return None.
epoch (Optional[int]): the epoch of the checkpoint if an actual
checkpoint is found. If no checkpoint is found, return None.
"""
model_folder, model_name, path = ModelLoader.get_model_path(
config,
is_checkpoint,
current_epoch,
)
# If i am looking for a checkpoint.
if is_checkpoint and current_epoch is None:
# If the checkpoint is specified by name use it
checkpoint_name = ModelLoader.get_checkpoint_name(config)
if checkpoint_name is not None:
checkpoint = ModelLoader.look_for_checkpoint_by_name(
model_folder, checkpoint_name
)
else:
checkpoint = ModelLoader.look_for_last_checkpoint(
model_folder, model_name
)
if checkpoint is not None:
path = os.path.join(model_folder, checkpoint)
# Get the epoch number from the checkpoint name
if path is not None:
if os.path.exists(path) is False:
path = None
if path is None:
if is_checkpoint:
checkpoint_name = ModelLoader.get_checkpoint_name(config)
if checkpoint_name is not None:
print(
f"No checkpoint found at {model_folder} "
f"with name {config.checkpoint_name}"
)
else:
print(
f"No previous checkpoint found at "
f"{model_folder} for {model_name}"
)
else:
print(
f"No previous model found at "
f"{model_folder} for model {model_name}"
)
else:
if is_checkpoint:
# the name is modelname_epoch_00000001_step_00000001.pt
# or modelname_epoch_00000001.pt
if "_step_" in path:
epoch = int(path.split("_epoch_")[-1].split("_")[0])
step = int(path.split("_step_")[-1].split(".")[0])
print(
f"Found checkpoint for epoch {epoch + 1},"
f" step {step + 1}..."
)
else:
epoch = int(path.split("_epoch_")[-1].split(".")[0])
print(f"Found checkpoint for epoch {epoch + 1} ...")
else:
print(f"Found model at {path}")
return path
def init_critic_from_reward(config: ConfigCritic) -> None:
"""Method to initialize the critic from the reward model.
If the critic folder is empty
"""
if config.is_reward is True:
raise ValueError(
"The config should work for the Critic model,"
"but the config seems to be for the Reward model"
)
# check that the critic folder is empty
path = ModelLoader.check_model_path(config)
_, _, critic_path = ModelLoader.get_model_path(config)
if path is None:
print("Initializing Critic from Reward model...")
config.is_reward = True
path = ModelLoader.check_model_path(config)
if path is not None:
_, _, reward_path = ModelLoader.get_model_path(config)
# copy the file in reward_path to critic_path
shutil.copy(reward_path, critic_path)
else:
print("Critic Model remains uninitialized")
config.is_reward = False
================================================
FILE: optimization/chatllama/chatllama/rlhf/reward.py
================================================
import json
import shutil
import os
import deepspeed
import torch
from accelerate import Accelerator
from beartype import beartype
from beartype.typing import Iterable, Tuple
from einops.layers.torch import Rearrange
from torch.utils.data import Dataset, DataLoader
from transformers import (
AutoModel,
AutoTokenizer,
)
from chatllama.rlhf.config import ConfigReward
from chatllama.rlhf.model_list import hf_models
from chatllama.rlhf.model_loader import ModelLoader
from chatllama.rlhf.utils import TrainingStats
class RewardModel(torch.nn.Module):
"""Model to be trained to predict the reward for RL.
or to be used as Critic in RL. It is a Language Model with a head
that predicts the reward (a scalar) for a given sequence of tokens.
Attributes:
model (torch.nn.Module): Model to be used for the reward model
tokenizer (torch.nn.Module): Tokenizer to be used for the reward model
head (torch.nn.Module): Head to be used for the reward model
config (ConfigReward): Config parameters for the reward model
Methods:
load_tokenizer: Load the tokenizer for the reward model
forward: Forward pass of the model (used by the critic)
save: Save the model
load: Load the model
get_reward: Get the reward for a given input (used by the reward model)
parameters: Return the parameters of the reward model
"""
def __init__(self, config: ConfigReward) -> None:
super().__init__()
# store config
self.config = config
# initialize the self.model
head_hidden_size = config.model_head_hidden_size
if config.model in hf_models:
self.tokenizer = self.load_tokenizer(config)
self.model = AutoModel.from_pretrained(config.model)
head_dim = self.model.config.hidden_size
if config.model.startswith("gpt2"):
head_dim = self.model.config.n_embd
self.head = torch.nn.Sequential(
torch.nn.Linear(head_dim, head_hidden_size),
torch.nn.ReLU(),
torch.nn.Linear(head_hidden_size, 1),
Rearrange("... 1 -> ..."),
)
else:
raise ValueError(f"Model {config.model} not supported")
# load the model
self.load()
# freeze model parameters (only train the head)
# for param in self.model.parameters():
# param.requires_grad = False
# move model to device
self.model.to(config.device)
self.head.to(config.device)
@staticmethod
def load_tokenizer(config: ConfigReward):
# load tokenizer from HF
tokenizer = AutoTokenizer.from_pretrained(
config.model,
padding_side="left",
padding=True,
truncation=True,
model_max_length=config.max_sequence_length,
)
# add eos token if not present
if tokenizer.eos_token is None:
tokenizer.eos_token = "</s>"
tokenizer.eos_token_id = 2 # OPT eos token id
# add pad token if not present
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
return tokenizer
@beartype
def load(self) -> None:
"""Load the model from the path"""
# look for a pretrained model
path = ModelLoader.check_model_path(
config=self.config,
is_checkpoint=False,
current_epoch=None,
)
# check if the model exists
if path is not None:
# load the model from the path
print("Loading ...")
model_dict = torch.load(path)
self.model.load_state_dict(model_dict.get("state_dict") or model_dict.get("model"))
self.head.load_state_dict(model_dict["head"])
@beartype
def save(self) -> None:
"""Save the model to the path"""
# get the path to save the model
model_folder, model_name, path = ModelLoader.get_model_path(
config=self.config,
is_checkpoint=False,
current_epoch=None,
)
# save the model
print(f"Saving model to {path} ...")
torch.save(
{"model": self.model.state_dict(), "head": self.head.state_dict()},
path,
)
@beartype
def parameters(
self,
) -> Iterable[torch.nn.Parameter]:
"""Return the parameters of the reward model"""
for p in self.model.parameters():
yield p
for p in self.head.parameters():
yield p
@beartype
def forward(
self, output_sequence: torch.Tensor, output_sequence_mask: torch.Tensor
) -> torch.Tensor:
"""Generate the sequence of rewards for the given output sequence
what is the quality of the output sequence tokens?
Args:
output_sequence (torch.Tensor): The sequence of tokens to be
evaluated
output_sequence_mask (torch.Tensor): Mask for the attention
Returns:
torch.Tensor: Rewards for the given output sequence
"""
output = self.model(
output_sequence, attention_mask=output_sequence_mask
)
# What if the output_sequence is longer than the max context of
# the model?
rewards = self.head(output.last_hidden_state)
if self.config.debug:
print("RewardModel.forward")
print("output_sequence.shape", output_sequence.shape)
print("output_sequence", output_sequence)
print("reward.shape", rewards.shape)
print("reward", rewards)
return rewards
@beartype
def get_reward(
self, output_sequence: torch.Tensor, output_sequence_mask: torch.Tensor
) -> torch.Tensor:
"""Get the reward for the given output sequence
Args:
output_sequence (torch.Tensor): The concatenation of initial input
and actor output as tokens
output_sequence_mask (torch.Tensor): Mask for the attention
"""
if output_sequence.shape[1] > self.config.max_sequence_length:
raise ValueError(
f"Output sequence is too long: {output_sequence.shape[1]}"
f" > {self.config.max_sequence_length}"
)
rewards = self.forward(output_sequence, output_sequence_mask)
return rewards[:, -1]
# just to keep namings consistent
CriticModel = RewardModel
class RewardDataset(Dataset):
"""Dataset class for the reward model
read a json file with the following format:
[
{
"user_input": "...",
"completion": "...",
"score": ...
},
...
]
Where:
user_input: the initial input of the user
completion: the completion generated by the model
score: the score given by the user to the completion (or by the LLM)
"""
def __init__(self, path: str) -> None:
print(f"Loading dataset from {path}")
with open(path, "r") as f:
self.data = list(json.load(f))
print(f"Loaded {len(self.data)} samples")
def __getitem__(self, idx: int):
user_input = self.data[idx]["user_input"]
completion = self.data[idx]["completion"]
if self.data[idx]["score"]:
score = float(self.data[idx]["score"])
else:
score = 2.5
item = (user_input + completion, score)
return item
def __len__(
self,
):
return len(self.data)
class RewardTrainer:
"""Class to train the reward model
Args:
config (ConfigModel): Config parameters for the model
Attributes:
model (RewardModel): Reward model
config (ConfigModel): Config parameters for the model
optimizer (torch.optim): Optimizer for the model
loss_function (torch.nn): Loss function for the model
validation_flag (bool): Flag to indicate if the validation dataset
is available
train_dataset (RewardDataset): Dataset for training
validation_dataset (RewardDataset): Dataset for validation
train_dataloader (DataLoader): Dataloader for training
validation_dataloader (DataLoader): Dataloader for validation
scheduler (torch.optim.lr_scheduler): Scheduler for the optimizer
training_stats (List[Dict]): List of dictionaries with the training
statistics
model_engine (ModelEngine): Model engine to train the model
using deepspeed
accelerator (Accelerator): Accelerator to train the model using
accelerate by HF.
Methods:
train: Train the reward model
save_checkpoints: Save the checkpoints of the model
load_checkpoints: Load the checkpoints of the model
"""
def __init__(self, config: ConfigReward) -> None:
# save the config
self.config = config
# load the model
self.reward = RewardModel(config)
# optimizer
self.optimizer = torch.optim.AdamW(
self.reward.parameters(), lr=config.lr
)
# loss function
self.loss_function = torch.nn.MSELoss()
# check validation dataset
self.validation_flag = False
if config.validation_dataset_path is not None:
self.validation_flag = True
# create dataset and dataloaders
self.train_dataset = RewardDataset(config.train_dataset_path)
self.train_dataloader = DataLoader(
self.train_dataset, batch_size=config.batch_size
)
if self.validation_flag:
self.eval_dataset = RewardDataset(config.validation_dataset_path)
self.validation_dataloader = DataLoader(
self.eval_dataset, batch_size=config.batch_size
)
# intilize scheduler - learning rate will drop to 10% of the initial
# value
self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
self.optimizer,
T_0=len(self.train_dataset) // config.batch_size,
T_mult=1,
eta_min=config.lr * 0.1,
last_epoch=-1,
)
# initialize training stats
stats_path = ModelLoader.get_training_stats_path(config)
self.training_stats = TrainingStats(stats_path)
# consistency check between accelerate and deepspeed
if config.accelerate_enable and config.deepspeed_enable:
raise ValueError(
"Both DeepSpeed and Accelerate are enabled for the Reward."
"Please choose one of them."
)
# initialize deepspeed
self.model_engine = None
if config.deepspeed_enable is True:
if config.deepspeed_config_path is None:
raise ValueError(
"DeepSpeed config path is None, but deepspeed is enabled"
)
if os.path.exists(config.deepspeed_config_path) is False:
raise ValueError(
f"DeepSpeed config path {config.deepspeed_config_path}"
f"does not exist"
)
(
self.model_engine,
self.optimizer,
self.train_dataloader,
self.scheduler,
) = deepspeed.initialize(
args=None,
model=self.reward,
model_parameters=self.reward.parameters(),
training_data=self.train_dataset,
config=self.config.deepspeed_config_path,
)
print("Training with DeepSpeed")
# initialize accelerate
self.accelerator = None
if config.accelerate_enable is True:
self.accelerator = Accelerator()
(
self.reward,
self.optimizer,
self.train_dataloader,
self.scheduler,
) = self.accelerator.prepare(
self.reward,
self.optimizer,
self.train_dataloader,
self.scheduler,
)
print("Training wit
gitextract_7q29s3ew/
├── .gitignore
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── README.md
├── monitoring/
│ └── nebuly/
│ └── __init__.py
└── optimization/
├── .github/
│ └── workflows/
│ └── tests.yml
├── chatllama/
│ ├── LICENSE
│ ├── README.md
│ ├── artifacts/
│ │ ├── config/
│ │ │ ├── config.yaml
│ │ │ ├── ds_config.json
│ │ │ └── peft_config.yaml
│ │ ├── datasets/
│ │ │ ├── actor_dataset.json
│ │ │ ├── reward_dataset.json
│ │ │ └── rlhf_dataset.json
│ │ ├── download_dataset.py
│ │ ├── extend_rlhf_dataset.py
│ │ ├── generate_actor_dataset.py
│ │ ├── generate_rewards.py
│ │ ├── main.py
│ │ └── templates.json
│ ├── chatllama/
│ │ ├── __init__.py
│ │ ├── langchain_modules/
│ │ │ ├── __init__.py
│ │ │ └── prompt_templates.py
│ │ ├── llama_model.py
│ │ └── rlhf/
│ │ ├── __init__.py
│ │ ├── actor.py
│ │ ├── config.py
│ │ ├── dataset.py
│ │ ├── model_list.py
│ │ ├── model_loader.py
│ │ ├── reward.py
│ │ ├── trainer.py
│ │ └── utils.py
│ └── setup.py
├── cloud_surfer/
│ └── README.md
├── forward_forward/
│ ├── README.md
│ ├── forward_forward/
│ │ ├── __init__.py
│ │ ├── api/
│ │ │ ├── __init__.py
│ │ │ └── functions.py
│ │ ├── app.py
│ │ ├── operations/
│ │ │ ├── __init__.py
│ │ │ ├── build_models.py
│ │ │ ├── data.py
│ │ │ ├── fetch_operations.py
│ │ │ └── trainers.py
│ │ ├── root_op.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── labels.py
│ │ ├── modules.py
│ │ └── utils.py
│ ├── requirements.txt
│ └── setup.py
├── large_speedster/
│ └── README.md
├── nebullvm/
│ ├── .pre-commit-config.yaml
│ ├── CONTRIBUTING.md
│ ├── Dockerfile
│ ├── LICENSE
│ ├── MANIFEST.in
│ ├── README.md
│ ├── azure-pipelines.yml
│ ├── docker_build.sh
│ ├── docs/
│ │ ├── Makefile
│ │ ├── README.md
│ │ ├── conf.py
│ │ ├── index.rst
│ │ ├── modules/
│ │ │ ├── api.rst
│ │ │ ├── converters.rst
│ │ │ ├── index.rst
│ │ │ ├── inference_learners.rst
│ │ │ ├── installers.rst
│ │ │ └── optimizers.rst
│ │ └── requirements-docs.txt
│ ├── nebullvm/
│ │ ├── __init__.py
│ │ ├── api/
│ │ │ └── __init__.py
│ │ ├── apps/
│ │ │ ├── __init__.py
│ │ │ └── base.py
│ │ ├── config.py
│ │ ├── core/
│ │ │ ├── __init__.py
│ │ │ ├── models.py
│ │ │ ├── tests/
│ │ │ │ ├── __init__.py
│ │ │ │ └── test_models.py
│ │ │ └── types.py
│ │ ├── installers/
│ │ │ ├── __init__.py
│ │ │ ├── auto_installer.py
│ │ │ ├── install_bladedisc.sh
│ │ │ ├── install_fastertransformer.sh
│ │ │ ├── install_tensor_rt.sh
│ │ │ ├── install_tvm.sh
│ │ │ ├── install_tvm_prerequisites.sh
│ │ │ ├── installers.py
│ │ │ ├── tests/
│ │ │ │ ├── __init__.py
│ │ │ │ └── test_install_frameworks.py
│ │ │ └── tvm_installers/
│ │ │ ├── arm/
│ │ │ │ └── config.cmake
│ │ │ ├── arm_cuda/
│ │ │ │ └── config.cmake
│ │ │ ├── x86/
│ │ │ │ └── config.cmake
│ │ │ └── x86_cuda/
│ │ │ └── config.cmake
│ │ ├── operations/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── conversions/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── converters.py
│ │ │ │ ├── huggingface.py
│ │ │ │ ├── pytorch.py
│ │ │ │ ├── tensorflow.py
│ │ │ │ └── utils.py
│ │ │ ├── fetch_operations/
│ │ │ │ ├── __init__.py
│ │ │ │ └── local.py
│ │ │ ├── inference_learners/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── blade_disc.py
│ │ │ │ ├── builders.py
│ │ │ │ ├── deepsparse.py
│ │ │ │ ├── faster_transformer.py
│ │ │ │ ├── huggingface.py
│ │ │ │ ├── neural_compressor.py
│ │ │ │ ├── onnx.py
│ │ │ │ ├── openvino.py
│ │ │ │ ├── tensor_rt.py
│ │ │ │ ├── tensorflow.py
│ │ │ │ ├── torch_dynamo.py
│ │ │ │ ├── torch_neuron.py
│ │ │ │ ├── torch_xla.py
│ │ │ │ ├── torchscript.py
│ │ │ │ ├── tvm.py
│ │ │ │ └── utils.py
│ │ │ ├── measures/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── measures.py
│ │ │ │ └── utils.py
│ │ │ └── optimizations/
│ │ │ ├── __init__.py
│ │ │ ├── compilers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── deepsparse.py
│ │ │ │ ├── faster_transformer/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── bert/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── checkpoint_quantization.py
│ │ │ │ │ │ └── modeling_bert.py
│ │ │ │ │ └── gpt/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── utils/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── gpt_decoder.py
│ │ │ │ │ └── huggingface_gpt_convert.py
│ │ │ │ ├── intel_neural_compressor.py
│ │ │ │ ├── onnxruntime.py
│ │ │ │ ├── openvino.py
│ │ │ │ ├── quantizations/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── intel_neural_compressor.py
│ │ │ │ │ ├── onnx.py
│ │ │ │ │ ├── openvino.py
│ │ │ │ │ ├── pytorch.py
│ │ │ │ │ ├── tensor_rt.py
│ │ │ │ │ ├── tensorflow.py
│ │ │ │ │ ├── tvm.py
│ │ │ │ │ └── utils.py
│ │ │ │ ├── tensor_rt.py
│ │ │ │ ├── tensorflow.py
│ │ │ │ ├── torch_dynamo.py
│ │ │ │ ├── torch_neuron.py
│ │ │ │ ├── torch_xla.py
│ │ │ │ ├── torchscript.py
│ │ │ │ ├── tvm.py
│ │ │ │ └── utils.py
│ │ │ ├── compressors/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── intel.py
│ │ │ │ ├── scripts/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── neural_magic_training.py
│ │ │ │ └── sparseml.py
│ │ │ ├── optimize_inference.py
│ │ │ ├── optimizers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ └── optimizers.py
│ │ │ ├── tests/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_deepsparse.py
│ │ │ │ ├── test_intel_neural_compressor.py
│ │ │ │ ├── test_onnxruntime.py
│ │ │ │ ├── test_openvino.py
│ │ │ │ ├── test_tensor_rt.py
│ │ │ │ ├── test_tensorflow.py
│ │ │ │ ├── test_torch_dynamo.py
│ │ │ │ ├── test_torchscript.py
│ │ │ │ ├── test_tvm.py
│ │ │ │ └── utils.py
│ │ │ └── utils.py
│ │ ├── optional_modules/
│ │ │ ├── __init__.py
│ │ │ ├── blade_disc.py
│ │ │ ├── deepsparse.py
│ │ │ ├── diffusers.py
│ │ │ ├── dummy.py
│ │ │ ├── huggingface.py
│ │ │ ├── neural_compressor.py
│ │ │ ├── onnx.py
│ │ │ ├── onnxruntime.py
│ │ │ ├── onnxsim.py
│ │ │ ├── openvino.py
│ │ │ ├── tensor_rt.py
│ │ │ ├── tensorflow.py
│ │ │ ├── torch.py
│ │ │ ├── torch_neuron.py
│ │ │ ├── torch_tensorrt.py
│ │ │ ├── torch_xla.py
│ │ │ ├── tvm.py
│ │ │ └── utils.py
│ │ └── tools/
│ │ ├── __init__.py
│ │ ├── adapters.py
│ │ ├── benchmark.py
│ │ ├── data.py
│ │ ├── diffusers.py
│ │ ├── feedback_collector.py
│ │ ├── hardware_utils.py
│ │ ├── huggingface.py
│ │ ├── logger.py
│ │ ├── onnx.py
│ │ ├── pytorch.py
│ │ ├── tests/
│ │ │ ├── __init__.py
│ │ │ ├── test_data.py
│ │ │ ├── test_hardware_utils.py
│ │ │ └── test_utils.py
│ │ ├── tf.py
│ │ ├── transformations.py
│ │ ├── utils.py
│ │ └── venv.py
│ ├── nebullvm.toml
│ ├── requirements-dev.txt
│ ├── requirements.txt
│ └── setup.py
├── open_alpha_tensor/
│ ├── README.md
│ ├── config.json
│ ├── main.py
│ ├── open_alpha_tensor/
│ │ ├── __init__.py
│ │ ├── api/
│ │ │ ├── __init__.py
│ │ │ └── functions.py
│ │ ├── config.py
│ │ ├── core/
│ │ │ ├── __init__.py
│ │ │ ├── actors/
│ │ │ │ ├── __init__.py
│ │ │ │ └── stage.py
│ │ │ ├── data/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── basis_change.py
│ │ │ │ ├── dataset.py
│ │ │ │ ├── generation.py
│ │ │ │ └── utils.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── alpha_tensor.py
│ │ │ │ ├── attention.py
│ │ │ │ ├── extras.py
│ │ │ │ ├── heads.py
│ │ │ │ └── torso.py
│ │ │ └── training.py
│ │ ├── operations/
│ │ │ ├── __init__.py
│ │ │ ├── checkpoint_op.py
│ │ │ ├── model_op.py
│ │ │ └── training_op.py
│ │ └── root_op.py
│ ├── resources/
│ │ └── open_alpha_tensor.md
│ └── setup.py
├── optimate/
│ └── README.md
└── speedster/
├── README.md
├── docs/
│ └── en/
│ ├── docs/
│ │ ├── advanced_options.md
│ │ ├── benchmarks.md
│ │ ├── getting_started/
│ │ │ ├── diffusers_getting_started.md
│ │ │ ├── hf_getting_started.md
│ │ │ ├── onnx_getting_started.md
│ │ │ ├── pytorch_getting_started.md
│ │ │ └── tf_getting_started.md
│ │ ├── hardware.md
│ │ ├── installation.md
│ │ ├── key_concepts.md
│ │ ├── notebooks.md
│ │ ├── overview.md
│ │ └── telemetry.md
│ └── mkdocs.yaml
├── notebooks/
│ ├── README.md
│ ├── diffusers/
│ │ ├── Accelerate_Stable_Diffusion_with_Speedster.ipynb
│ │ └── Readme.md
│ ├── huggingface/
│ │ ├── Accelerate_Hugging_Face_PyTorch_BERT_with_Speedster.ipynb
│ │ ├── Accelerate_Hugging_Face_PyTorch_DistilBERT_with_Speedster.ipynb
│ │ ├── Accelerate_Hugging_Face_PyTorch_GPT2_with_Speedster.ipynb
│ │ ├── Accelerate_Hugging_Face_PyTorch_T5_with_Speedster.ipynb
│ │ ├── Accelerate_Hugging_Face_TensorFlow_BERT_with_Speedster.ipynb
│ │ ├── Readme.md
│ │ └── faster_transformer_bert.py
│ ├── onnx/
│ │ ├── Accelerate_ONNX_ResNet50_with_Speedster.ipynb
│ │ └── Readme.md
│ ├── pytorch/
│ │ ├── Accelerate_PyTorch_ResNet50_with_Speedster.ipynb
│ │ ├── Accelerate_PyTorch_ViT_with_Speedster.ipynb
│ │ ├── Accelerate_PyTorch_YOLOv5_with_Speedster.ipynb
│ │ ├── Accelerate_PyTorch_YOLOv8_with_Speedster.ipynb
│ │ ├── Accelerate_fast_ai_Resnet34_with_Speedster.ipynb
│ │ └── Readme.md
│ └── tensorflow/
│ ├── Accelerate_Tensorflow_ResNet50_with_Speedster.ipynb
│ └── Readme.md
├── requirements.txt
├── setup.py
├── speedster/
│ ├── __init__.py
│ ├── api/
│ │ ├── __init__.py
│ │ ├── functions.py
│ │ └── tests/
│ │ ├── __init__.py
│ │ ├── test_huggingface.py
│ │ ├── test_onnx.py
│ │ ├── test_pytorch.py
│ │ ├── test_tensorflow.py
│ │ └── utils.py
│ ├── root_op.py
│ ├── speedster.py
│ ├── tests/
│ │ ├── __init__.py
│ │ └── test_root_op.py
│ └── utils.py
└── speedster.toml
SYMBOL INDEX (1553 symbols across 149 files)
FILE: optimization/chatllama/artifacts/extend_rlhf_dataset.py
function _get_template_and_variables (line 8) | def _get_template_and_variables(prompt: str, with_examples: bool):
function use_langchain_model (line 18) | def use_langchain_model(
class HuggingFaceChain (line 39) | class HuggingFaceChain:
method __init__ (line 40) | def __init__(
method run (line 49) | def run(self, **kwargs):
function use_huggingface_model (line 58) | def use_huggingface_model(
function main (line 68) | def main():
FILE: optimization/chatllama/artifacts/generate_actor_dataset.py
function create_conversation (line 15) | def create_conversation(human_agent: LLMChain, bot_agent: LLMChain):
function build_agents (line 27) | def build_agents():
function get_sub_conversations (line 46) | def get_sub_conversations(conversation: str, system_prompt: str):
function main (line 58) | def main():
FILE: optimization/chatllama/artifacts/generate_rewards.py
class ScoreGenerator (line 7) | class ScoreGenerator:
method __init__ (line 8) | def __init__(
method distill (line 33) | def distill(
FILE: optimization/chatllama/chatllama/llama_model.py
class MyTokenizer (line 30) | class MyTokenizer:
method __init__ (line 35) | def __init__(self, model_path: Optional[str] = None):
method encode (line 47) | def encode(
method decode (line 62) | def decode(self, t: List[int]) -> str:
class HFLikeTokenizer (line 68) | class HFLikeTokenizer:
method __init__ (line 69) | def __init__(self, tokenizer: Tokenizer):
method create_sequence_mask (line 84) | def create_sequence_mask(self, tokens: torch.Tensor) -> torch.Tensor:
method __call__ (line 98) | def __call__(self, texts: Union[List[str], str], *args, **kwargs):
method decode (line 140) | def decode(self, tokens):
class ModelArgs (line 145) | class ModelArgs:
class RMSNorm (line 168) | class RMSNorm(torch.nn.Module):
method __init__ (line 174) | def __init__(self, dim: int, eps: float = 1e-6):
method _norm (line 179) | def _norm(self, x):
method forward (line 182) | def forward(self, x):
function precompute_freqs_cis (line 187) | def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0):
function reshape_for_broadcast (line 197) | def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
function apply_rotary_emb (line 207) | def apply_rotary_emb(
class Attention (line 220) | class Attention(nn.Module):
method __init__ (line 226) | def __init__(self, args: ModelArgs):
method forward (line 290) | def forward(
class FeedForward (line 342) | class FeedForward(nn.Module):
method __init__ (line 348) | def __init__(
method forward (line 384) | def forward(self, x):
class TransformerBlock (line 388) | class TransformerBlock(nn.Module):
method __init__ (line 394) | def __init__(self, layer_id: int, args: ModelArgs):
method forward (line 411) | def forward(
class Transformer (line 438) | class Transformer(nn.Module):
method __init__ (line 445) | def __init__(self, params: ModelArgs):
method forward (line 499) | def forward(
method _forward (line 506) | def _forward(
method generate (line 550) | def generate(
function setup_model_parallel (line 580) | def setup_model_parallel() -> Tuple[int, int]:
function setup_model_deepspeed (line 594) | def setup_model_deepspeed() -> Tuple[int, int]:
function load_checkpoints (line 606) | def load_checkpoints(
function load_model (line 622) | def load_model(
function load_tokenizer (line 649) | def load_tokenizer(tokenizer_path: str):
function load_tokenizer_test (line 654) | def load_tokenizer_test(tokenizer_path: Optional[str] = None):
function load_model_test (line 659) | def load_model_test(
FILE: optimization/chatllama/chatllama/rlhf/actor.py
class ActorModel (line 30) | class ActorModel(torch.nn.Module):
method __init__ (line 46) | def __init__(self, config: ConfigActor) -> None:
method load (line 112) | def load(self) -> None:
method save (line 130) | def save(self) -> None:
method load_tokenizer (line 147) | def load_tokenizer(config: ConfigActor):
method parameters (line 178) | def parameters(self):
method forward (line 183) | def forward(
method generate (line 211) | def generate(
class ActorDataset (line 277) | class ActorDataset(Dataset):
method __init__ (line 292) | def __init__(
method __getitem__ (line 301) | def __getitem__(self, idx):
method __len__ (line 304) | def __len__(
class ActorTrainer (line 310) | class ActorTrainer:
method __init__ (line 338) | def __init__(self, config: ConfigActor) -> None:
method save_checkpoint (line 434) | def save_checkpoint(
method load_checkpoint (line 497) | def load_checkpoint(
method add_eos_token (line 554) | def add_eos_token(
method train (line 589) | def train(
FILE: optimization/chatllama/chatllama/rlhf/config.py
class ConfigReward (line 11) | class ConfigReward:
class ConfigActor (line 91) | class ConfigActor:
class ConfigTrainer (line 166) | class ConfigTrainer:
class Config (line 221) | class Config:
method __init__ (line 250) | def __init__(
FILE: optimization/chatllama/chatllama/rlhf/dataset.py
class BaseDataset (line 16) | class BaseDataset:
method __init__ (line 17) | def __init__(
method sort_conversation (line 23) | def sort_conversation(
method take_n_samples (line 77) | def take_n_samples(
method clean_dataset (line 98) | def clean_dataset(config: ConfigType):
class StanfordNLPSHPDataset (line 248) | class StanfordNLPSHPDataset(BaseDataset):
method __init__ (line 251) | def __init__(
method reformat_dataset (line 258) | def reformat_dataset(self, data: List) -> List[Dict]:
method save_dataset (line 293) | def save_dataset(
class AnthropicRLHF (line 343) | class AnthropicRLHF(BaseDataset):
method __init__ (line 344) | def __init__(
method reformat_dataset (line 352) | def reformat_dataset(self, data: List) -> List[Dict]:
method save_dataset (line 388) | def save_dataset(
FILE: optimization/chatllama/chatllama/rlhf/model_loader.py
class ModelLoader (line 17) | class ModelLoader:
method __init__ (line 20) | def __init__(
method get_training_stats_path (line 26) | def get_training_stats_path(config: ConfigType) -> str:
method look_for_last_checkpoint (line 39) | def look_for_last_checkpoint(
method look_for_checkpoint_by_name (line 64) | def look_for_checkpoint_by_name(
method get_checkpoint_name (line 84) | def get_checkpoint_name(config: ConfigType) -> str:
method get_base_model_folder_from_config (line 91) | def get_base_model_folder_from_config(config: ConfigType) -> str:
method get_model_type_from_config (line 102) | def get_model_type_from_config(config: ConfigType) -> str:
method get_model_name_from_config (line 116) | def get_model_name_from_config(config: ConfigType) -> str:
method delete_old_checkpoints (line 131) | def delete_old_checkpoints(
method get_model_path (line 159) | def get_model_path(
method check_model_path (line 253) | def check_model_path(
method init_critic_from_reward (line 338) | def init_critic_from_reward(config: ConfigCritic) -> None:
FILE: optimization/chatllama/chatllama/rlhf/reward.py
class RewardModel (line 23) | class RewardModel(torch.nn.Module):
method __init__ (line 44) | def __init__(self, config: ConfigReward) -> None:
method load_tokenizer (line 79) | def load_tokenizer(config: ConfigReward):
method load (line 101) | def load(self) -> None:
method save (line 120) | def save(self) -> None:
method parameters (line 137) | def parameters(
method forward (line 147) | def forward(
method get_reward (line 177) | def get_reward(
class RewardDataset (line 200) | class RewardDataset(Dataset):
method __init__ (line 217) | def __init__(self, path: str) -> None:
method __getitem__ (line 223) | def __getitem__(self, idx: int):
method __len__ (line 234) | def __len__(
class RewardTrainer (line 240) | class RewardTrainer:
method __init__ (line 272) | def __init__(self, config: ConfigReward) -> None:
method save_checkpoint (line 370) | def save_checkpoint(
method load_checkpoint (line 429) | def load_checkpoint(
method train (line 490) | def train(
FILE: optimization/chatllama/chatllama/rlhf/trainer.py
function change_tokenization (line 62) | def change_tokenization(tokens, tokenizer1, tokenizer2):
function check_model_family (line 106) | def check_model_family(config1: ConfigType, config2: ConfigType) -> bool:
class ActorCritic (line 141) | class ActorCritic(torch.nn.Module):
method __init__ (line 161) | def __init__(self, config: Config) -> None:
method load (line 178) | def load(self) -> None:
method save (line 188) | def save(self) -> None:
method save_deepspeed (line 223) | def save_deepspeed(
method forward (line 247) | def forward(
method generate (line 313) | def generate(
class ExperienceDataset (line 430) | class ExperienceDataset(Dataset):
method __init__ (line 433) | def __init__(
method __len__ (line 441) | def __len__(
method __getitem__ (line 446) | def __getitem__(self, idx) -> Tuple:
class ExamplesSampler (line 464) | class ExamplesSampler:
method __init__ (line 478) | def __init__(
method sample (line 487) | def sample(self, n: int) -> List:
class RLTrainer (line 496) | class RLTrainer:
method __init__ (line 522) | def __init__(
method initialize_deepspeed_model (line 624) | def initialize_deepspeed_model(
method save_checkpoint (line 648) | def save_checkpoint(
method load_checkpoint (line 724) | def load_checkpoint(
method learn (line 809) | def learn(self, memories: Deque[Memory]) -> None:
method train (line 1056) | def train(
FILE: optimization/chatllama/chatllama/rlhf/utils.py
class TrainingStats (line 7) | class TrainingStats:
method __init__ (line 18) | def __init__(self, path: str):
method plot (line 31) | def plot(self):
method save (line 59) | def save(
method load (line 82) | def load(
method clear (line 94) | def clear(
class ConversationLog (line 107) | class ConversationLog:
method __init__ (line 113) | def __init__(self, path: str):
method append (line 120) | def append(
method save (line 146) | def save(self):
method load (line 158) | def load(self):
method clear (line 162) | def clear(self):
method show (line 169) | def show(self, current_iteration: int = None):
FILE: optimization/forward_forward/forward_forward/api/functions.py
function train_with_forward_forward_algorithm (line 9) | def train_with_forward_forward_algorithm(
FILE: optimization/forward_forward/forward_forward/app.py
class ForwardForwardApp (line 6) | class ForwardForwardApp(App):
method __init__ (line 7) | def __init__(self):
method execute (line 11) | def execute(self, *args, **kwargs):
FILE: optimization/forward_forward/forward_forward/operations/build_models.py
class BaseModelBuildOperation (line 14) | class BaseModelBuildOperation(Operation, ABC):
method __init__ (line 15) | def __init__(self):
method execute (line 20) | def execute(
method get_result (line 32) | def get_result(self):
class FCNetFFProgressiveBuildOperation (line 36) | class FCNetFFProgressiveBuildOperation(BaseModelBuildOperation):
method __init__ (line 37) | def __init__(self):
method execute (line 40) | def execute(
class RecurrentFCNetFFBuildOperation (line 65) | class RecurrentFCNetFFBuildOperation(BaseModelBuildOperation):
method __init__ (line 66) | def __init__(self):
method execute (line 69) | def execute(
class LMFFNetBuildOperation (line 89) | class LMFFNetBuildOperation(BaseModelBuildOperation):
method __init__ (line 90) | def __init__(self):
method execute (line 93) | def execute(
FILE: optimization/forward_forward/forward_forward/operations/data.py
class MNISTDataLoaderOperation (line 10) | class MNISTDataLoaderOperation(Operation):
method __init__ (line 13) | def __init__(self):
method get_result (line 18) | def get_result(self) -> Any:
method execute (line 24) | def execute(self, batch_size: int, shuffle: bool):
function download_fables (line 58) | def download_fables():
function get_fables (line 65) | def get_fables():
function tokenize (line 108) | def tokenize(fable, max_len=100):
function get_tokenized_fables (line 117) | def get_tokenized_fables():
function get_dataloader (line 130) | def get_dataloader(batch_size=32, test_size=0.2, shuffle=True):
class AesopFablesDataLoaderOperation (line 144) | class AesopFablesDataLoaderOperation(Operation):
method __init__ (line 147) | def __init__(self):
method get_result (line 152) | def get_result(self) -> Any:
method execute (line 158) | def execute(self, batch_size: int, shuffle: bool):
FILE: optimization/forward_forward/forward_forward/operations/fetch_operations.py
class FetchTrainingDataFromLocal (line 7) | class FetchTrainingDataFromLocal(Operation):
method get_result (line 8) | def get_result(self) -> Any:
method execute (line 11) | def execute(self, train_data: DataLoader, test_data: DataLoader):
method get_train_data (line 15) | def get_train_data(self) -> DataLoader:
method get_test_data (line 18) | def get_test_data(self) -> DataLoader:
FILE: optimization/forward_forward/forward_forward/operations/trainers.py
class BaseForwardForwardTrainer (line 21) | class BaseForwardForwardTrainer(Operation, ABC):
method __init__ (line 22) | def __init__(self):
method get_result (line 31) | def get_result(self):
method execute (line 35) | def execute(
method _train (line 63) | def _train(self, *args, **kwargs):
class ForwardForwardTrainer (line 67) | class ForwardForwardTrainer(BaseForwardForwardTrainer):
method _train (line 68) | def _train(self, epochs: int, theta: float, device: str, **kwargs):
class RecurrentForwardForwardTrainer (line 111) | class RecurrentForwardForwardTrainer(BaseForwardForwardTrainer):
method _train (line 112) | def _train(self, epochs: int, theta: float, device: str, **kwargs):
class NLPForwardForwardTrainer (line 150) | class NLPForwardForwardTrainer(BaseForwardForwardTrainer):
method _train (line 151) | def _train(
FILE: optimization/forward_forward/forward_forward/root_op.py
class ForwardForwardModelType (line 21) | class ForwardForwardModelType(Enum):
class ForwardForwardRootOp (line 27) | class ForwardForwardRootOp(Operation):
method __init__ (line 28) | def __init__(self, model_type: ForwardForwardModelType):
method execute (line 44) | def execute(
method get_result (line 92) | def get_result(self):
FILE: optimization/forward_forward/forward_forward/utils/labels.py
class LabelsInjector (line 6) | class LabelsInjector:
method __init__ (line 7) | def __init__(self, labels: List):
method inject_train (line 20) | def inject_train(self, input_image: torch.Tensor, labels: torch.Tensor):
method inject_eval (line 45) | def inject_eval(self, input_image: torch.Tensor):
function select_random_different_label (line 58) | def select_random_different_label(labels: torch.Tensor, n_classes: int):
FILE: optimization/forward_forward/forward_forward/utils/modules.py
function loss_fn (line 10) | def loss_fn(y, theta, sign):
function probabilistic_loss_fn (line 19) | def probabilistic_loss_fn(y, theta, sign):
function alternative_loss_fn (line 29) | def alternative_loss_fn(y, theta, sign):
class BaseFFLayer (line 40) | class BaseFFLayer(torch.nn.Module, ABC):
method ff_train (line 42) | def ff_train(
method positive_eval (line 48) | def positive_eval(self, input_tensor: torch.Tensor, theta: float):
method requires_training (line 52) | def requires_training(self):
class FFLayer (line 56) | class FFLayer(BaseFFLayer):
method __init__ (line 59) | def __init__(
method forward (line 78) | def forward(self, x):
method ff_train (line 81) | def ff_train(
method positive_eval (line 110) | def positive_eval(self, input_tensor: torch.Tensor, theta: float):
class FFNormalization (line 116) | class FFNormalization(BaseFFLayer):
method __init__ (line 117) | def __init__(self):
method forward (line 120) | def forward(self, x):
method ff_train (line 127) | def ff_train(
method positive_eval (line 135) | def positive_eval(self, input_tensor: torch.Tensor, theta: float):
method requires_training (line 144) | def requires_training(self):
class LinearReLU (line 148) | class LinearReLU(torch.nn.Module):
method __init__ (line 149) | def __init__(self, in_features, out_features):
method forward (line 154) | def forward(self, x):
class FCNetFFProgressive (line 158) | class FCNetFFProgressive(BaseFFLayer):
method __init__ (line 164) | def __init__(
method forward (line 186) | def forward(self, x):
method progressive_train (line 191) | def progressive_train(self, dl: torch.utils.data.DataLoader, theta: fl...
method ff_train (line 231) | def ff_train(
method positive_eval (line 248) | def positive_eval(self, input_tensor: torch.Tensor, theta: float):
method device (line 260) | def device(self):
class NormLinearReLU (line 264) | class NormLinearReLU(torch.nn.Module):
method __init__ (line 265) | def __init__(self, in_features, out_features):
method forward (line 270) | def forward(self, x):
class RecurrentFFLayer (line 274) | class RecurrentFFLayer(BaseFFLayer):
method __init__ (line 275) | def __init__(
method forward (line 289) | def forward(self, x_prev, x_same, x_next):
method ff_train (line 295) | def ff_train(
method positive_eval (line 315) | def positive_eval(
class RecurrentProjectionFFLayer (line 327) | class RecurrentProjectionFFLayer(BaseFFLayer):
method __init__ (line 328) | def __init__(
method forward (line 343) | def forward(self, x: torch.Tensor):
method ff_train (line 346) | def ff_train(
method positive_eval (line 364) | def positive_eval(self, x: torch.Tensor, theta: float):
class RecurrentProjectedSoftmaxFFLayer (line 370) | class RecurrentProjectedSoftmaxFFLayer(BaseFFLayer):
method __init__ (line 371) | def __init__(
method forward (line 388) | def forward(self, x: torch.Tensor):
method ff_train (line 394) | def ff_train(
method positive_eval (line 412) | def positive_eval(self, x: torch.Tensor, theta: float):
class RecurrentFCNetFF (line 418) | class RecurrentFCNetFF(BaseFFLayer):
method __init__ (line 421) | def __init__(
method device (line 468) | def device(self):
method bottom_up (line 472) | def bottom_up(self, x: torch.Tensor, y: torch.Tensor):
method forward (line 502) | def forward(self, x: torch.Tensor, prev_states: List[torch.Tensor]):
method ff_train (line 517) | def ff_train(
method positive_eval (line 585) | def positive_eval(self, input_tensor: torch.Tensor, theta: float):
class LMFFLinearSoftmax (line 634) | class LMFFLinearSoftmax(BaseFFLayer):
method __init__ (line 635) | def __init__(
method forward (line 651) | def forward(self, x: torch.Tensor):
method ff_train (line 657) | def ff_train(
method positive_eval (line 681) | def positive_eval(self, x: torch.Tensor):
class LMFFNet (line 686) | class LMFFNet(BaseFFLayer):
method __init__ (line 687) | def __init__(
method forward (line 727) | def forward(self, input_tensor: torch.Tensor):
method ff_train (line 737) | def ff_train(
method LM_ff_train (line 781) | def LM_ff_train(self, input_tensor: torch.Tensor, theta: float):
method positive_eval (line 803) | def positive_eval(self, input_tensor: torch.Tensor, theta: float):
FILE: optimization/forward_forward/forward_forward/utils/utils.py
class ProgressiveTrainingDataset (line 6) | class ProgressiveTrainingDataset(torch.utils.data.Dataset):
method __init__ (line 9) | def __init__(self, dataset_generator: Generator):
method __getitem__ (line 17) | def __getitem__(self, index):
method __len__ (line 20) | def __len__(self):
function compute_perplexity (line 24) | def compute_perplexity(tensor: torch.Tensor):
FILE: optimization/nebullvm/nebullvm/apps/base.py
class App (line 4) | class App(abc.ABC):
method __init__ (line 5) | def __init__(self):
method execute (line 9) | def execute(self, **kwargs):
FILE: optimization/nebullvm/nebullvm/core/models.py
class DeepLearningFramework (line 13) | class DeepLearningFramework(Enum):
class QuantizationType (line 19) | class QuantizationType(Enum):
class Status (line 25) | class Status(Enum):
class DeviceType (line 30) | class DeviceType(Enum):
class DataType (line 37) | class DataType(str, Enum):
method from_framework_format (line 44) | def from_framework_format(
method to_torch_format (line 56) | def to_torch_format(self):
method to_tf_format (line 63) | def to_tf_format(self):
method to_numpy_format (line 70) | def to_numpy_format(self):
class ModelCompiler (line 78) | class ModelCompiler(Enum):
class ModelCompressor (line 99) | class ModelCompressor(Enum):
class OptimizationTime (line 104) | class OptimizationTime(Enum):
class HardwareSetup (line 110) | class HardwareSetup:
class OptimizedModel (line 118) | class OptimizedModel:
class OriginalModel (line 129) | class OriginalModel:
class BenchmarkOriginalModelResult (line 139) | class BenchmarkOriginalModelResult:
class OptimizeInferenceResult (line 147) | class OptimizeInferenceResult:
method metric_drop (line 155) | def metric_drop(self) -> Optional[float]:
method latency_improvement_rate (line 161) | def latency_improvement_rate(self) -> Optional[float]:
method throughput_improvement_rate (line 172) | def throughput_improvement_rate(self) -> Optional[float]:
method size_improvement_rate (line 180) | def size_improvement_rate(self) -> Optional[float]:
class InputInfo (line 188) | class InputInfo:
method __init__ (line 201) | def __init__(self, size: Tuple[int, ...], dtype: str, **extra_info):
method __getattr__ (line 206) | def __getattr__(self, item):
method dict (line 209) | def dict(self):
class DynamicAxisInfo (line 216) | class DynamicAxisInfo:
method dict (line 220) | def dict(self):
method retrieve_output_dim (line 225) | def retrieve_output_dim(
class ModelParams (line 243) | class ModelParams:
method __post_init__ (line 250) | def __post_init__(self):
method dict (line 259) | def dict(self):
method input_sizes (line 274) | def input_sizes(self):
class Device (line 279) | class Device:
method __init__ (line 280) | def __init__(self, type: DeviceType, idx: int = 0):
method from_str (line 285) | def from_str(cls, string: str) -> "Device":
method to_torch_format (line 299) | def to_torch_format(self) -> str:
method to_tf_format (line 307) | def to_tf_format(self) -> str:
method get_total_memory (line 313) | def get_total_memory(self) -> int:
method get_free_memory (line 335) | def get_free_memory(self) -> int:
FILE: optimization/nebullvm/nebullvm/core/tests/test_models.py
class TestOptimizeInferenceResult (line 7) | class TestOptimizeInferenceResult(unittest.TestCase):
method test_latency_improvement_rate__optimized_model_is_none (line 8) | def test_latency_improvement_rate__optimized_model_is_none(self):
method test_latency_improvement_rate__optimized_latency_is_zero (line 16) | def test_latency_improvement_rate__optimized_latency_is_zero(self):
method test_latency_improvement_rate__original_latency_is_zero (line 26) | def test_latency_improvement_rate__original_latency_is_zero(self):
method test_latency_improvement_rate__rate_gt_1 (line 36) | def test_latency_improvement_rate__rate_gt_1(self):
method test_latency_improvement_rate__rate_lt_1 (line 46) | def test_latency_improvement_rate__rate_lt_1(self):
method test_th_improvement_rate__optimized_model_is_none (line 56) | def test_th_improvement_rate__optimized_model_is_none(self):
method test_th_improvement_rate__optimized_th_is_zero (line 64) | def test_th_improvement_rate__optimized_th_is_zero(self):
method test_th_improvement_rate__original_th_is_zero (line 74) | def test_th_improvement_rate__original_th_is_zero(self):
method test_th_improvement_rate__rate_gt_1 (line 84) | def test_th_improvement_rate__rate_gt_1(self):
method test_th_improvement_rate__rate_lt_1 (line 94) | def test_th_improvement_rate__rate_lt_1(self):
method test_size_improvement_rate__optimized_model_is_none (line 104) | def test_size_improvement_rate__optimized_model_is_none(self):
method test_size_improvement_rate__optimized_size_is_zero (line 112) | def test_size_improvement_rate__optimized_size_is_zero(self):
method test_size_improvement_rate__original_size_is_zero (line 122) | def test_size_improvement_rate__original_size_is_zero(self):
method test_size_improvement_rate__rate_gt_1 (line 132) | def test_size_improvement_rate__rate_gt_1(self):
method test_size_improvement_rate__rate_lt_1 (line 142) | def test_size_improvement_rate__rate_lt_1(self):
method test_metric_drop__optimized_model_is_none (line 152) | def test_metric_drop__optimized_model_is_none(self):
method test_metric_drop (line 160) | def test_metric_drop(self):
FILE: optimization/nebullvm/nebullvm/installers/auto_installer.py
function select_frameworks_to_install (line 47) | def select_frameworks_to_install(
function select_compilers_to_install (line 96) | def select_compilers_to_install(
function auto_install_libraries (line 137) | def auto_install_libraries(
function main (line 160) | def main():
FILE: optimization/nebullvm/nebullvm/installers/installers.py
function get_cpu_arch (line 25) | def get_cpu_arch():
function _get_os (line 33) | def _get_os():
function install_tvm (line 37) | def install_tvm(
function install_bladedisc (line 82) | def install_bladedisc():
function install_torch_tensor_rt (line 100) | def install_torch_tensor_rt():
function install_tf2onnx (line 159) | def install_tf2onnx():
function install_tensor_rt (line 182) | def install_tensor_rt():
function install_openvino (line 205) | def install_openvino(with_optimization: bool = True):
function install_onnxruntime (line 246) | def install_onnxruntime():
function install_deepsparse (line 268) | def install_deepsparse():
function install_intel_neural_compressor (line 303) | def install_intel_neural_compressor():
function install_onnx_simplifier (line 327) | def install_onnx_simplifier():
function install_faster_transformer (line 343) | def install_faster_transformer(
class BaseInstaller (line 384) | class BaseInstaller(ABC):
method __init__ (line 385) | def __init__(self, module_list: List[str]):
method install_compilers (line 388) | def install_compilers(
method install_dependencies (line 418) | def install_dependencies(include_framework: List[str]):
method check_framework (line 422) | def check_framework():
method install_framework (line 426) | def install_framework():
class PytorchInstaller (line 430) | class PytorchInstaller(BaseInstaller):
method install_dependencies (line 432) | def install_dependencies(include_framework: List[str]):
method check_framework (line 436) | def check_framework():
method install_framework (line 456) | def install_framework():
class TensorflowInstaller (line 468) | class TensorflowInstaller(BaseInstaller):
method install_dependencies (line 470) | def install_dependencies(include_framework: List[str]):
method check_framework (line 475) | def check_framework():
method install_framework (line 493) | def install_framework():
class ONNXInstaller (line 515) | class ONNXInstaller(BaseInstaller):
method install_dependencies (line 517) | def install_dependencies(include_framework: List[str]):
method check_framework (line 524) | def check_framework():
method install_framework (line 542) | def install_framework():
class HuggingFaceInstaller (line 558) | class HuggingFaceInstaller(BaseInstaller):
method install_dependencies (line 560) | def install_dependencies(include_framework: List[str]):
method check_framework (line 564) | def check_framework():
method install_framework (line 573) | def install_framework():
class DiffusersInstaller (line 585) | class DiffusersInstaller(BaseInstaller):
method install_dependencies (line 587) | def install_dependencies(include_framework: List[str]):
method check_framework (line 608) | def check_framework():
method install_framework (line 620) | def install_framework():
FILE: optimization/nebullvm/nebullvm/installers/tests/test_install_frameworks.py
function test_install_default_option (line 7) | def test_install_default_option():
function test_install_torch_full (line 24) | def test_install_torch_full():
function test_install_torch_base (line 35) | def test_install_torch_base():
function test_install_tensorflow_full (line 46) | def test_install_tensorflow_full():
function test_install_tensorflow_base (line 57) | def test_install_tensorflow_base():
function test_install_onnx_full (line 68) | def test_install_onnx_full():
function test_install_onnx_base (line 79) | def test_install_onnx_base():
function test_install_diffusers_full (line 90) | def test_install_diffusers_full():
function test_install_huggingface_full (line 101) | def test_install_huggingface_full():
function test_install_huggingface_full_tf (line 112) | def test_install_huggingface_full_tf():
function test_install_huggingface_full_torch (line 123) | def test_install_huggingface_full_torch():
function test_install_huggingface_tf (line 134) | def test_install_huggingface_tf():
function test_install_huggingface_torch (line 145) | def test_install_huggingface_torch():
function test_install_huggingface_compilers_all (line 156) | def test_install_huggingface_compilers_all():
function test_install_huggingface_torch_compilers_all (line 167) | def test_install_huggingface_torch_compilers_all():
function test_install_torch_compilers_all (line 184) | def test_install_torch_compilers_all():
function test_install_torch_compilers_deepsparse (line 201) | def test_install_torch_compilers_deepsparse():
function test_install_torch_compilers_invalid (line 212) | def test_install_torch_compilers_invalid():
function test_install_torch_onnx_compilers_all (line 223) | def test_install_torch_onnx_compilers_all():
function test_install_tensorflow_compilers_all (line 241) | def test_install_tensorflow_compilers_all():
FILE: optimization/nebullvm/nebullvm/operations/base.py
class Operation (line 11) | class Operation(abc.ABC):
method __init__ (line 12) | def __init__(self):
method set_feedback_collector (line 19) | def set_feedback_collector(self, feedback_collector: FeedbackCollector):
method execute (line 26) | def execute(self, **kwargs):
method state (line 30) | def state(self) -> Dict[str, any]:
method to (line 33) | def to(self, device: Union[str, Device]):
FILE: optimization/nebullvm/nebullvm/operations/conversions/converters.py
class Converter (line 15) | class Converter(Operation, abc.ABC):
method __init__ (line 21) | def __init__(self, model_name: Optional[str] = None):
method set_state (line 30) | def set_state(
method get_result (line 37) | def get_result(self) -> List:
class PytorchConverter (line 41) | class PytorchConverter(Converter):
method execute (line 44) | def execute(
method onnx_conversion (line 60) | def onnx_conversion(self, save_path, model_params):
method tensorflow_conversion (line 74) | def tensorflow_conversion(self):
class TensorflowConverter (line 79) | class TensorflowConverter(Converter):
method execute (line 82) | def execute(
method onnx_conversion (line 98) | def onnx_conversion(self, save_path, model_params):
method pytorch_conversion (line 110) | def pytorch_conversion(self):
class ONNXConverter (line 115) | class ONNXConverter(Converter):
method execute (line 118) | def execute(self, save_path, model_params):
method tensorflow_conversion (line 132) | def tensorflow_conversion(self):
method pytorch_conversion (line 136) | def pytorch_conversion(self):
FILE: optimization/nebullvm/nebullvm/operations/conversions/huggingface.py
class _HFTextDataset (line 26) | class _HFTextDataset(Sequence):
method __init__ (line 27) | def __init__(
method __getitem__ (line 47) | def __getitem__(self, item: int):
method __len__ (line 61) | def __len__(self):
class _HFDictDataset (line 65) | class _HFDictDataset(Sequence):
method __init__ (line 66) | def __init__(
method __getitem__ (line 76) | def __getitem__(self, item: int):
method __len__ (line 90) | def __len__(self):
method _concatenate (line 94) | def _concatenate(mini_batch, key):
function convert_hf_model (line 103) | def convert_hf_model(
FILE: optimization/nebullvm/nebullvm/operations/conversions/pytorch.py
function convert_torch_to_onnx (line 16) | def convert_torch_to_onnx(
FILE: optimization/nebullvm/nebullvm/operations/conversions/tensorflow.py
function convert_tf_to_onnx (line 15) | def convert_tf_to_onnx(
function convert_tf_saved_model_to_onnx (line 44) | def convert_tf_saved_model_to_onnx(
function convert_keras_to_onnx (line 78) | def convert_keras_to_onnx(
FILE: optimization/nebullvm/nebullvm/operations/conversions/utils.py
function get_conversion_op (line 10) | def get_conversion_op(framework: DeepLearningFramework) -> Converter:
FILE: optimization/nebullvm/nebullvm/operations/fetch_operations/local.py
class FetchModelFromLocal (line 6) | class FetchModelFromLocal(Operation):
method execute (line 7) | def execute(self, model: Any):
method get_model (line 10) | def get_model(self) -> any:
method get_result (line 13) | def get_result(self) -> Any:
class FetchDataFromLocal (line 17) | class FetchDataFromLocal(Operation):
method execute (line 18) | def execute(self, data: Union[Iterable, Sequence]):
method get_data (line 21) | def get_data(self) -> any:
method get_result (line 24) | def get_result(self) -> Any:
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/base.py
class BuildInferenceLearner (line 26) | class BuildInferenceLearner(Operation, ABC):
method __init__ (line 27) | def __init__(self):
method execute (line 32) | def execute(self, **kwargs):
method get_result (line 35) | def get_result(self) -> Any:
class BaseInferenceLearner (line 40) | class BaseInferenceLearner(ABC):
method name (line 51) | def name(self) -> str:
method __post_init__ (line 54) | def __post_init__(self, input_data):
method _store_file (line 60) | def _store_file(self, file_path: Union[str, Path]):
method _store_dir (line 63) | def _store_dir(self, dir_path: Union[str, Path]):
method __del__ (line 75) | def __del__(self, shutil=shutil):
method predict_from_files (line 81) | def predict_from_files(
method predict_from_listified_tensors (line 100) | def predict_from_listified_tensors(self, *listified_tensors: List):
method list2tensor (line 123) | def list2tensor(self, listified_tensor: List) -> Any:
method tensor2list (line 134) | def tensor2list(self, tensor: Any) -> List:
method _read_file (line 145) | def _read_file(self, input_file: str) -> Any:
method _save_file (line 155) | def _save_file(self, prediction: Any, output_file: str):
method predict (line 164) | def predict(self, *args, **kwargs) -> Any:
method run (line 175) | def run(self, *args, **kwargs) -> Any:
method forward (line 179) | def forward(self, *args, **kwargs):
method __call__ (line 183) | def __call__(self, *args, **kwargs):
method save (line 188) | def save(self, path: Union[str, Path], **kwargs):
method load (line 197) | def load(cls, path: Union[Path, str], **kwargs):
method get_size (line 209) | def get_size(self):
method free_gpu_memory (line 214) | def free_gpu_memory(self):
method get_inputs_example (line 219) | def get_inputs_example(self):
method output_format (line 227) | def output_format(self):
method input_format (line 232) | def input_format(self):
class LearnerMetadata (line 236) | class LearnerMetadata:
method __init__ (line 257) | def __init__(
method __getitem__ (line 279) | def __getitem__(self, item):
method from_model (line 289) | def from_model(cls, model: BaseInferenceLearner, **kwargs):
method from_dict (line 316) | def from_dict(cls, dictionary: Dict):
method to_dict (line 337) | def to_dict(self) -> Dict:
method read (line 355) | def read(cls, path: Union[Path, str]):
method save (line 369) | def save(self, path: Union[Path, str]):
method load_model (line 381) | def load_model(
class PytorchBaseInferenceLearner (line 403) | class PytorchBaseInferenceLearner(BaseInferenceLearner, ABC):
method input_format (line 405) | def input_format(self):
method output_format (line 409) | def output_format(self):
method list2tensor (line 412) | def list2tensor(self, listified_tensor: List) -> torch.Tensor:
method tensor2list (line 423) | def tensor2list(self, tensor: torch.Tensor) -> List:
method free_gpu_memory (line 434) | def free_gpu_memory(self):
method set_model_on_gpu (line 438) | def set_model_on_gpu(self):
method _read_file (line 442) | def _read_file(self, input_file: Union[str, Path]) -> torch.Tensor:
method _save_file (line 446) | def _save_file(
method get_inputs_example (line 451) | def get_inputs_example(self, random=False):
method get_size (line 461) | def get_size(self):
class TensorflowBaseInferenceLearner (line 478) | class TensorflowBaseInferenceLearner(BaseInferenceLearner, ABC):
method input_format (line 480) | def input_format(self):
method output_format (line 484) | def output_format(self):
method free_gpu_memory (line 487) | def free_gpu_memory(self):
method set_model_on_gpu (line 491) | def set_model_on_gpu(self):
method list2tensor (line 494) | def list2tensor(self, listified_tensor: List) -> tf.Tensor:
method tensor2list (line 505) | def tensor2list(self, tensor: tf.Tensor) -> List:
method _read_file (line 516) | def _read_file(self, input_file: Union[str, Path]) -> tf.Tensor:
method _save_file (line 521) | def _save_file(self, prediction: tf.Tensor, output_file: Union[str, Pa...
method get_inputs_example (line 524) | def get_inputs_example(self, random=False):
class NumpyBaseInferenceLearner (line 535) | class NumpyBaseInferenceLearner(BaseInferenceLearner, ABC):
method input_format (line 537) | def input_format(self):
method output_format (line 541) | def output_format(self):
method list2tensor (line 544) | def list2tensor(self, listified_tensor: List) -> np.ndarray:
method tensor2list (line 555) | def tensor2list(self, tensor: np.ndarray) -> List:
method _read_file (line 566) | def _read_file(self, input_file: Union[str, Path]) -> np.ndarray:
method _save_file (line 570) | def _save_file(
method get_inputs_example (line 575) | def get_inputs_example(self, random=False):
class InferenceLearnerWrapper (line 586) | class InferenceLearnerWrapper(BaseInferenceLearner, ABC):
method __init__ (line 602) | def __init__(self, core_inference_learner: BaseInferenceLearner):
method list2tensor (line 608) | def list2tensor(self, listified_tensor: List) -> Any:
method tensor2list (line 611) | def tensor2list(self, tensor: Any) -> List:
method _read_file (line 614) | def _read_file(self, input_file: str) -> Any:
method _save_file (line 617) | def _save_file(self, prediction: Any, output_file: str):
method save (line 620) | def save(self, path: Union[str, Path], **kwargs):
method _get_extra_metadata_kwargs (line 629) | def _get_extra_metadata_kwargs(self) -> Dict:
method _save_wrapper_extra_info (line 632) | def _save_wrapper_extra_info(self):
method _convert_metadata_to_inputs (line 636) | def _convert_metadata_to_inputs(metadata: LearnerMetadata) -> Dict:
method _load_wrapper_extra_info (line 640) | def _load_wrapper_extra_info(builder_inputs: Dict) -> Dict:
method load (line 644) | def load(cls, path: Union[Path, str], **kwargs):
method free_gpu_memory (line 655) | def free_gpu_memory(self):
method get_inputs_example (line 658) | def get_inputs_example(self):
method output_format (line 662) | def output_format(self):
method input_format (line 666) | def input_format(self):
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/blade_disc.py
class BladeDISCInferenceLearner (line 12) | class BladeDISCInferenceLearner(TorchScriptInferenceLearner):
method from_torch_model (line 16) | def from_torch_model(
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/builders.py
class TorchScriptBuildInferenceLearner (line 64) | class TorchScriptBuildInferenceLearner(BuildInferenceLearner):
method execute (line 65) | def execute(
class TorchXLABuildInferenceLearner (line 80) | class TorchXLABuildInferenceLearner(BuildInferenceLearner):
method execute (line 81) | def execute(
class TorchNeuronBuildInferenceLearner (line 96) | class TorchNeuronBuildInferenceLearner(BuildInferenceLearner):
method execute (line 97) | def execute(
class TorchDynamoBuildInferenceLearner (line 112) | class TorchDynamoBuildInferenceLearner(BuildInferenceLearner):
method execute (line 113) | def execute(
class TensorflowBuildInferenceLearner (line 128) | class TensorflowBuildInferenceLearner(BuildInferenceLearner):
method execute (line 129) | def execute(
class TFLiteBuildInferenceLearner (line 144) | class TFLiteBuildInferenceLearner(BuildInferenceLearner):
method execute (line 145) | def execute(
class DeepSparseBuildInferenceLearner (line 160) | class DeepSparseBuildInferenceLearner(BuildInferenceLearner):
method execute (line 161) | def execute(
class ONNXBuildInferenceLearner (line 179) | class ONNXBuildInferenceLearner(BuildInferenceLearner):
method execute (line 180) | def execute(
class OpenVINOBuildInferenceLearner (line 203) | class OpenVINOBuildInferenceLearner(BuildInferenceLearner):
method execute (line 204) | def execute(
class PyTorchTensorRTBuildInferenceLearner (line 223) | class PyTorchTensorRTBuildInferenceLearner(BuildInferenceLearner):
method execute (line 224) | def execute(
class ONNXTensorRTBuildInferenceLearner (line 239) | class ONNXTensorRTBuildInferenceLearner(BuildInferenceLearner):
method execute (line 240) | def execute(
class IntelNeuralCompressorBuildInferenceLearner (line 270) | class IntelNeuralCompressorBuildInferenceLearner(BuildInferenceLearner):
method execute (line 271) | def execute(
class PyTorchApacheTVMBuildInferenceLearner (line 288) | class PyTorchApacheTVMBuildInferenceLearner(BuildInferenceLearner):
method execute (line 289) | def execute(
class ONNXApacheTVMBuildInferenceLearner (line 321) | class ONNXApacheTVMBuildInferenceLearner(BuildInferenceLearner):
method execute (line 322) | def execute(
class FasterTransformerBuildInferenceLearner (line 360) | class FasterTransformerBuildInferenceLearner(BuildInferenceLearner):
method execute (line 361) | def execute(
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/deepsparse.py
class DeepSparseInferenceLearner (line 22) | class DeepSparseInferenceLearner(BaseInferenceLearner, ABC):
method __init__ (line 38) | def __init__(
method get_size (line 61) | def get_size(self):
method save (line 64) | def save(self, path: Union[str, Path], **kwargs):
method free_gpu_memory (line 86) | def free_gpu_memory(self):
method load (line 90) | def load(cls, path: Union[Path, str], **kwargs):
method _predict_arrays (line 124) | def _predict_arrays(self, input_arrays: Generator[np.ndarray, None, No...
class PytorchDeepSparseInferenceLearner (line 130) | class PytorchDeepSparseInferenceLearner(
method run (line 146) | def run(self, *input_tensors: torch.Tensor) -> Tuple[torch.Tensor]:
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/faster_transformer.py
class FasterTransformerInferenceLearner (line 6) | class FasterTransformerInferenceLearner(TorchScriptInferenceLearner):
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/huggingface.py
class HuggingFaceInferenceLearner (line 19) | class HuggingFaceInferenceLearner(InferenceLearnerWrapper):
method name (line 40) | def name(self) -> str:
method __init__ (line 43) | def __init__(
method _save_wrapper_extra_info (line 55) | def _save_wrapper_extra_info(self):
method get_size (line 58) | def get_size(self):
method _load_wrapper_extra_info (line 62) | def _load_wrapper_extra_info(builder_inputs: Dict) -> Dict:
method run (line 65) | def run(self, *args, **kwargs) -> Any:
method _get_extra_metadata_kwargs (line 92) | def _get_extra_metadata_kwargs(self) -> Dict:
method _convert_metadata_to_inputs (line 108) | def _convert_metadata_to_inputs(metadata: LearnerMetadata) -> Dict:
class DiffusionInferenceLearner (line 128) | class DiffusionInferenceLearner(BaseInferenceLearner, ABC):
method name (line 130) | def name(self) -> str:
method __init__ (line 133) | def __init__(self, pipeline: StableDiffusionPipeline):
method __call__ (line 136) | def __call__(self, *args, **kwargs):
method run (line 139) | def run(self, *args, **kwargs) -> Any:
method save (line 142) | def save(self, path: Union[str, Path], **kwargs):
method load (line 146) | def load(
method get_size (line 162) | def get_size(self):
method free_gpu_memory (line 175) | def free_gpu_memory(self):
method get_inputs_example (line 178) | def get_inputs_example(self):
method output_format (line 182) | def output_format(self):
method input_format (line 186) | def input_format(self):
method list2tensor (line 189) | def list2tensor(self, listified_tensor: List) -> Any:
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/neural_compressor.py
class NeuralCompressorInferenceLearner (line 34) | class NeuralCompressorInferenceLearner(BaseInferenceLearner, ABC):
method __init__ (line 45) | def __init__(
method get_size (line 57) | def get_size(self):
method save (line 62) | def save(self, path: Union[str, Path], **kwargs):
method load (line 81) | def load(cls, path: Union[Path, str], **kwargs):
class PytorchNeuralCompressorInferenceLearner (line 147) | class PytorchNeuralCompressorInferenceLearner(
method free_gpu_memory (line 158) | def free_gpu_memory(self):
method run (line 163) | def run(self, *input_tensors: torch.Tensor) -> Tuple[torch.Tensor]:
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/onnx.py
function _running_on_intel_cpu (line 40) | def _running_on_intel_cpu(use_gpu):
function _get_ort_session_options (line 49) | def _get_ort_session_options(use_gpu) -> ort.SessionOptions:
class ONNXInferenceLearner (line 67) | class ONNXInferenceLearner(BaseInferenceLearner, ABC):
method __init__ (line 82) | def __init__(
method _setup_tensorrt (line 114) | def _setup_tensorrt(quantization_type: QuantizationType, device: Device):
method get_size (line 153) | def get_size(self):
method free_gpu_memory (line 160) | def free_gpu_memory(self):
method set_model_on_gpu (line 164) | def set_model_on_gpu(self):
method save (line 187) | def save(self, path: Union[str, Path], **kwargs):
method load (line 228) | def load(cls, path: Union[Path, str], **kwargs):
method _predict_arrays (line 269) | def _predict_arrays(self, input_arrays: Generator[np.ndarray, None, No...
class PytorchONNXInferenceLearner (line 278) | class PytorchONNXInferenceLearner(
method run (line 293) | def run(self, *input_tensors: torch.Tensor) -> Tuple[torch.Tensor]:
class TensorflowONNXInferenceLearner (line 324) | class TensorflowONNXInferenceLearner(
method run (line 339) | def run(self, *input_tensors: tf.Tensor) -> Tuple[tf.Tensor, ...]:
class NumpyONNXInferenceLearner (line 370) | class NumpyONNXInferenceLearner(
method run (line 385) | def run(self, *input_tensors: np.ndarray) -> Tuple[np.ndarray, ...]:
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/openvino.py
class OpenVinoInferenceLearner (line 31) | class OpenVinoInferenceLearner(BaseInferenceLearner, ABC):
method __init__ (line 52) | def __init__(
method load (line 73) | def load(cls, path: Union[Path, str], **kwargs):
method get_size (line 106) | def get_size(self):
method free_gpu_memory (line 109) | def free_gpu_memory(self):
method from_model_name (line 113) | def from_model_name(
method _get_dynamic_shape (line 173) | def _get_dynamic_shape(
method _get_metadata (line 209) | def _get_metadata(self, **kwargs) -> LearnerMetadata:
method save (line 217) | def save(self, path: Union[str, Path], **kwargs):
method _predict_array (line 238) | def _predict_array(
class PytorchOpenVinoInferenceLearner (line 259) | class PytorchOpenVinoInferenceLearner(
method run (line 280) | def run(self, *input_tensors: torch.Tensor) -> Tuple[torch.Tensor, ...]:
class TensorflowOpenVinoInferenceLearner (line 308) | class TensorflowOpenVinoInferenceLearner(
method run (line 330) | def run(self, *input_tensors: tf.Tensor) -> Tuple[tf.Tensor, ...]:
class NumpyOpenVinoInferenceLearner (line 357) | class NumpyOpenVinoInferenceLearner(
method run (line 379) | def run(self, *input_tensors: np.ndarray) -> Tuple[np.ndarray, ...]:
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/tensor_rt.py
class ONNXTensorRTInferenceLearner (line 35) | class ONNXTensorRTInferenceLearner(BaseInferenceLearner, ABC):
method __init__ (line 54) | def __init__(
method _get_metadata (line 75) | def _get_metadata(self, **kwargs) -> LearnerMetadata:
method _synchronize_stream (line 82) | def _synchronize_stream(self):
method stream_ptr (line 86) | def stream_ptr(self):
method _get_default_cuda_stream (line 90) | def _get_default_cuda_stream() -> Any:
method check_env (line 94) | def check_env(use_gpu):
method _set_cuda_env (line 101) | def _set_cuda_env(self, use_gpu):
method from_engine_path (line 109) | def from_engine_path(
method _predict_tensors (line 171) | def _predict_tensors(
method get_size (line 200) | def get_size(self):
method free_gpu_memory (line 203) | def free_gpu_memory(self):
method save (line 207) | def save(self, path: Union[str, Path], **kwargs):
method load (line 226) | def load(cls, path: Union[Path, str], **kwargs):
class PytorchTensorRTInferenceLearner (line 257) | class PytorchTensorRTInferenceLearner(PytorchBaseInferenceLearner):
method __init__ (line 261) | def __init__(
method get_size (line 277) | def get_size(self):
method run (line 286) | def run(self, *input_tensors: torch.Tensor) -> Tuple[torch.Tensor, ...]:
method save (line 305) | def save(self, path: Union[str, Path], **kwargs):
method load (line 313) | def load(cls, path: Union[Path, str], **kwargs):
class PytorchONNXTensorRTInferenceLearner (line 328) | class PytorchONNXTensorRTInferenceLearner(
method _synchronize_stream (line 348) | def _synchronize_stream(self):
method _get_default_cuda_stream (line 352) | def _get_default_cuda_stream() -> Any:
method stream_ptr (line 356) | def stream_ptr(self):
method run (line 359) | def run(self, *input_tensors: torch.Tensor) -> Tuple[torch.Tensor, ...]:
class BaseArrayONNXTensorRTInferenceLearner (line 433) | class BaseArrayONNXTensorRTInferenceLearner(ONNXTensorRTInferenceLearner...
method _synchronize_stream (line 438) | def _synchronize_stream(self):
method _get_default_cuda_stream (line 442) | def _get_default_cuda_stream() -> Any:
method stream_ptr (line 446) | def stream_ptr(self):
method _convert_to_array_and_free_memory (line 450) | def _convert_to_array_and_free_memory(cuda_array) -> np.ndarray:
method _predict_array (line 455) | def _predict_array(
class TensorflowONNXTensorRTInferenceLearner (line 505) | class TensorflowONNXTensorRTInferenceLearner(
method run (line 526) | def run(self, *input_tensors: tf.Tensor) -> Tuple[tf.Tensor, ...]:
class NumpyONNXTensorRTInferenceLearner (line 560) | class NumpyONNXTensorRTInferenceLearner(
method run (line 581) | def run(self, *input_tensors: np.ndarray) -> Tuple[np.ndarray, ...]:
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/tensorflow.py
class TensorflowBackendInferenceLearner (line 14) | class TensorflowBackendInferenceLearner(TensorflowBaseInferenceLearner):
method __init__ (line 17) | def __init__(self, tf_model: tf.Module, device: Device, **kwargs):
method get_size (line 23) | def get_size(self):
method run (line 26) | def run(self, *input_tensors: tf.Tensor) -> Tuple[tf.Tensor, ...]:
method save (line 35) | def save(self, path: Union[str, Path], **kwargs):
method load (line 43) | def load(cls, path: Union[Path, str], **kwargs):
class TFLiteBackendInferenceLearner (line 60) | class TFLiteBackendInferenceLearner(TensorflowBaseInferenceLearner):
method __init__ (line 63) | def __init__(self, tflite_file: bytes, device: Device, **kwargs):
method get_size (line 69) | def get_size(self):
method free_gpu_memory (line 72) | def free_gpu_memory(self):
method run (line 77) | def run(self, *input_tensors: tf.Tensor):
method save (line 97) | def save(self, path: Union[str, Path], **kwargs):
method load (line 107) | def load(cls, path: Union[Path, str], **kwargs):
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/torch_dynamo.py
class TorchDynamoInferenceLearner (line 9) | class TorchDynamoInferenceLearner(TorchScriptInferenceLearner):
method save (line 12) | def save(self, path: Union[str, Path], **kwargs):
method load (line 19) | def load(cls, path: Union[Path, str], **kwargs):
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/torch_neuron.py
class TorchNeuronInferenceLearner (line 10) | class TorchNeuronInferenceLearner(TorchScriptInferenceLearner):
method get_size (line 13) | def get_size(self):
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/torch_xla.py
class TorchXLAInferenceLearner (line 18) | class TorchXLAInferenceLearner(PytorchBaseInferenceLearner):
method __init__ (line 22) | def __init__(self, torch_model: torch.nn.Module, device: Device, **kwa...
method run (line 30) | def run(self, *input_tensors: torch.Tensor) -> Tuple[torch.Tensor, ...]:
method get_size (line 43) | def get_size(self):
method save (line 59) | def save(self, path: Union[str, Path], **kwargs):
method load (line 68) | def load(cls, path: Union[Path, str], **kwargs):
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/torchscript.py
class TorchScriptInferenceLearner (line 19) | class TorchScriptInferenceLearner(PytorchBaseInferenceLearner):
method __init__ (line 23) | def __init__(self, torch_model: ScriptModule, device: Device, **kwargs):
method run (line 31) | def run(self, *input_tensors: torch.Tensor) -> Tuple[torch.Tensor, ...]:
method save (line 45) | def save(self, path: Union[str, Path], **kwargs):
method load (line 54) | def load(cls, path: Union[Path, str], **kwargs):
method from_torch_model (line 69) | def from_torch_model(
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/tvm.py
class ApacheTVMInferenceLearner (line 35) | class ApacheTVMInferenceLearner(BaseInferenceLearner, ABC):
method __init__ (line 57) | def __init__(
method get_size (line 79) | def get_size(self):
method _has_half_precision_transformation (line 88) | def _has_half_precision_transformation(self):
method _predict_array (line 94) | def _predict_array(
method free_gpu_memory (line 117) | def free_gpu_memory(self):
method save (line 121) | def save(self, path: Union[str, Path], **kwargs):
method load (line 142) | def load(cls, path: Union[Path, str], **kwargs):
method from_runtime_module (line 177) | def from_runtime_module(
class BaseArrayApacheTVMInferenceLearner (line 218) | class BaseArrayApacheTVMInferenceLearner(ApacheTVMInferenceLearner, ABC):
method _inner_predict (line 223) | def _inner_predict(
class PytorchApacheTVMInferenceLearner (line 273) | class PytorchApacheTVMInferenceLearner(
method run (line 294) | def run(self, *input_tensors: torch.Tensor) -> Tuple[torch.Tensor, ...]:
method _convert_device (line 328) | def _convert_device(device: Any):
class TensorflowApacheTVMInferenceLearner (line 334) | class TensorflowApacheTVMInferenceLearner(
method run (line 356) | def run(self, *input_tensors: tf.Tensor) -> Tuple[tf.Tensor, ...]:
class NumpyApacheTVMInferenceLearner (line 386) | class NumpyApacheTVMInferenceLearner(
method run (line 408) | def run(self, *input_tensors: np.ndarray) -> Tuple[np.ndarray, ...]:
FILE: optimization/nebullvm/nebullvm/operations/inference_learners/utils.py
function load_model (line 9) | def load_model(path: Union[Path, str], pipe: StableDiffusionPipeline = N...
function save_model (line 30) | def save_model(model: Any, path: Union[Path, str]):
FILE: optimization/nebullvm/nebullvm/operations/measures/base.py
class Measure (line 6) | class Measure(Operation, abc.ABC):
method __init__ (line 7) | def __init__(self):
method execute (line 12) | def execute(self, **kwargs):
FILE: optimization/nebullvm/nebullvm/operations/measures/measures.py
class MetricDropMeasure (line 36) | class MetricDropMeasure(Measure):
method __init__ (line 37) | def __init__(self):
method execute (line 41) | def execute(
method get_result (line 72) | def get_result(self) -> Tuple[bool, float]:
class LatencyOriginalModelMeasure (line 76) | class LatencyOriginalModelMeasure(Measure):
method __init__ (line 77) | def __init__(self):
method execute (line 81) | def execute(
FILE: optimization/nebullvm/nebullvm/operations/measures/utils.py
function compute_torch_latency (line 20) | def compute_torch_latency(
function compute_tf_latency (line 62) | def compute_tf_latency(
function compute_onnx_latency (line 97) | def compute_onnx_latency(
function compute_optimized_running_time (line 153) | def compute_optimized_running_time(
function compute_relative_difference (line 203) | def compute_relative_difference(
function compute_accuracy_drop (line 228) | def compute_accuracy_drop(tensor_1: Any, tensor_2: Any, y: Any) -> float:
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/base.py
class Compiler (line 8) | class Compiler(Operation, abc.ABC):
method __init__ (line 11) | def __init__(self):
method execute (line 16) | def execute(self, **kwargs):
method _compile_model (line 20) | def _compile_model(self, **kwargs) -> Any:
method _quantize_model (line 24) | def _quantize_model(self, **kwargs) -> Any:
method get_result (line 27) | def get_result(self) -> Any:
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/deepsparse.py
class DeepSparseCompiler (line 19) | class DeepSparseCompiler(Compiler):
method __init__ (line 25) | def __init__(self):
method execute (line 29) | def execute(
method _compile_model (line 66) | def _compile_model(
method _quantize_model (line 82) | def _quantize_model(**kwargs):
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/__init__.py
function detect_and_swap_model (line 31) | def detect_and_swap_model(model, data_type="fp16", remove_padding=False):
class FasterTransformerCompiler (line 48) | class FasterTransformerCompiler(TorchScriptCompiler):
method _compile_model (line 58) | def _compile_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/bert/__init__.py
function swap_bert_encoder (line 25) | def swap_bert_encoder(model, data_type, lib_path, remove_padding=False):
function swap_model (line 53) | def swap_model(
function detect_and_swap_bert_model (line 64) | def detect_and_swap_bert_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/bert/checkpoint_quantization.py
function checkpoint_quantization (line 29) | def checkpoint_quantization(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/bert/modeling_bert.py
class EncoderWeights (line 35) | class EncoderWeights(object):
method __init__ (line 36) | def __init__(
method listed_weights (line 162) | def listed_weights(self):
method to_cuda (line 722) | def to_cuda(self):
method to_half (line 736) | def to_half(self):
method to_bfloat16 (line 744) | def to_bfloat16(self):
method to_int8 (line 752) | def to_int8(self, sparse=False, ths_path="./lib/libth_transformer.so"):
class CustomEncoder (line 861) | class CustomEncoder(torch.nn.Module):
method __init__ (line 862) | def __init__(
method forward (line 965) | def forward(self, hidden_states, attention_mask, sequence_lengths):
class HuggingFaceEncoder (line 970) | class HuggingFaceEncoder(torch.nn.Module):
method __init__ (line 971) | def __init__(self, layer_num, head_num, head_size, weights=None):
method forward (line 993) | def forward(self, hidden_states, attention_mask):
class BertModel (line 1022) | class BertModel(BertPreTrainedModel):
method __init__ (line 1023) | def __init__(self, config):
method forward (line 1034) | def forward(
method replace_encoder (line 1147) | def replace_encoder(self, new_encoder):
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/gpt/__init__.py
class FasterTransformerGPT2Wrapper (line 39) | class FasterTransformerGPT2Wrapper(torch.nn.Module):
method __init__ (line 40) | def __init__(self, model: gpt_decoder.Gpt, config):
method generate (line 47) | def generate(
function convert_gpt2_lm_head_model (line 166) | def convert_gpt2_lm_head_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/gpt/utils/gpt_decoder.py
function to_numpy_dtype (line 33) | def to_numpy_dtype(maybe_str_dtype: Union[str, np.dtype]):
function to_torch_dtype (line 52) | def to_torch_dtype(maybe_str_dtype: Union[str, torch.dtype]):
function load_weight_from_bin (line 73) | def load_weight_from_bin(
class GptLayerWeights (line 93) | class GptLayerWeights:
method __init__ (line 94) | def __init__(
method from_config (line 226) | def from_config(cls, config: GptInitModelParameters):
method dtype (line 240) | def dtype(self):
method device (line 244) | def device(self):
method _map (line 247) | def _map(self, func):
method _map_int8 (line 255) | def _map_int8(self, func):
method float (line 270) | def float(self):
method half (line 275) | def half(self):
method bfloat16 (line 282) | def bfloat16(self):
method cuda (line 289) | def cuda(self, device=None):
method to (line 294) | def to(self, device=None):
method is_valid_pp_group (line 299) | def is_valid_pp_group(self, layer, pp_rank):
method load (line 302) | def load(
class FtModuleBase (line 511) | class FtModuleBase:
method __init__ (line 512) | def __init__(self):
method from_config (line 517) | def from_config(cls, config: GptInitModelParameters, **kwargs):
method _initialize_model (line 521) | def _initialize_model(self, force_init=False):
method forward (line 525) | def forward(self, *args, **kwargs):
method set_weight (line 528) | def set_weight(self, weight: GptLayerWeights):
method dtype (line 537) | def dtype(self):
method device (line 542) | def device(self):
method cuda (line 546) | def cuda(self, device=None):
method to (line 551) | def to(self, device=None):
method float (line 555) | def float(self):
method half (line 560) | def half(self):
method bfloat16 (line 565) | def bfloat16(self):
class GptContextDecoder (line 571) | class GptContextDecoder(FtModuleBase):
method __init__ (line 572) | def __init__(
method __repr__ (line 614) | def __repr__(self):
method from_config (line 636) | def from_config(cls, config: GptInitModelParameters, **kwargs):
method _initialize_model (line 654) | def _initialize_model(self, force_init=False):
method forward (line 693) | def forward(
class GptDecoder (line 752) | class GptDecoder(FtModuleBase):
method __init__ (line 753) | def __init__(
method __repr__ (line 789) | def __repr__(self):
method from_config (line 811) | def from_config(cls, config: GptInitModelParameters, **kwargs):
method _initialize_model (line 828) | def _initialize_model(self, force_init=False):
method forward (line 863) | def forward(
class Gpt (line 923) | class Gpt:
method __init__ (line 924) | def __init__(
method from_config (line 1085) | def from_config(cls, config: GptInitModelParameters, **kwargs):
method load (line 1110) | def load(
method dtype (line 1219) | def dtype(self):
method device (line 1224) | def device(self):
method cuda (line 1228) | def cuda(self, device=None):
method to (line 1234) | def to(self, device=None):
method float (line 1239) | def float(self):
method half (line 1244) | def half(self):
method bfloat16 (line 1249) | def bfloat16(self):
method _mask_padded_vocab_weights (line 1254) | def _mask_padded_vocab_weights(self, weight: torch.Tensor):
method generate_pad_mask (line 1259) | def generate_pad_mask(self, input_lengths, memory_length, init_step=0):
method get_local_batch_size (line 1291) | def get_local_batch_size(self, batch_size):
method generate (line 1303) | def generate(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/gpt/utils/huggingface_gpt_convert.py
function get_weight_data_type (line 36) | def get_weight_data_type(data_type):
function split_and_convert_process (line 45) | def split_and_convert_process(i, saved_dir, factor, key, args, val):
function split_and_convert (line 115) | def split_and_convert(args):
function main (line 128) | def main(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/intel_neural_compressor.py
class IntelNeuralCompressorCompiler (line 17) | class IntelNeuralCompressorCompiler(Compiler):
method __init__ (line 26) | def __init__(self):
method execute (line 30) | def execute(
method _compile_model (line 77) | def _compile_model(self, model: Union[str, Path]):
method _quantize_model (line 81) | def _quantize_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/onnxruntime.py
class ONNXCompiler (line 20) | class ONNXCompiler(Compiler):
method execute (line 33) | def execute(
method _compile_model (line 81) | def _compile_model(self, model: Union[str, Path]):
method _quantize_model (line 84) | def _quantize_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/openvino.py
class OpenVINOCompiler (line 25) | class OpenVINOCompiler(Compiler):
method __init__ (line 35) | def __init__(self):
method execute (line 38) | def execute(
method _compile_model (line 117) | def _compile_model(
method _quantize_model (line 134) | def _quantize_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/intel_neural_compressor.py
function _prepare_quantization_config (line 20) | def _prepare_quantization_config(model: Any, tmp_dir: str, approach: str):
function _prepare_mixed_precision_config (line 40) | def _prepare_mixed_precision_config(model: Any, tmp_dir: str):
function _get_dataloader (line 60) | def _get_dataloader(input_data: DataManager):
function _quantize_static (line 67) | def _quantize_static(model: Module, input_data: DataManager) -> GraphMod...
function _quantize_dynamic (line 81) | def _quantize_dynamic(model: Module) -> GraphModule:
function _mixed_precision (line 93) | def _mixed_precision(
function quantize_neural_compressor (line 107) | def quantize_neural_compressor(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/onnx.py
class _IterableCalibrationDataReader (line 26) | class _IterableCalibrationDataReader(CalibrationDataReader):
method __init__ (line 27) | def __init__(
method get_next (line 42) | def get_next(self) -> dict:
method from_dataloader (line 46) | def from_dataloader(
function _quantize_dynamic (line 55) | def _quantize_dynamic(model_path: str) -> str:
function _get_quantization_type_for_static (line 69) | def _get_quantization_type_for_static(use_gpu) -> Tuple[QuantType, Quant...
function _quantize_static (line 89) | def _quantize_static(
function _convert_to_half_precision (line 113) | def _convert_to_half_precision(
function quantize_onnx (line 136) | def quantize_onnx(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/openvino.py
class _CalibrationDataLoader (line 15) | class _CalibrationDataLoader(DataLoader):
method __init__ (line 16) | def __init__(
method __len__ (line 22) | def __len__(self):
method __getitem__ (line 25) | def __getitem__(self, item):
function quantize_openvino (line 35) | def quantize_openvino(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/pytorch.py
class _QuantWrapper (line 26) | class _QuantWrapper(Module):
method __init__ (line 27) | def __init__(self, model: Module):
method forward (line 34) | def forward(self, *inputs: torch.Tensor):
function _quantize_dynamic_torch (line 40) | def _quantize_dynamic_torch(model: Module):
function _quantize_dynamic_torch_fx (line 51) | def _quantize_dynamic_torch_fx(
function _quantize_static_torch (line 65) | def _quantize_static_torch(
function _quantize_static_torch_fx (line 81) | def _quantize_static_torch_fx(
function _quantize_static (line 98) | def _quantize_static(
function _quantize_dynamic (line 121) | def _quantize_dynamic(
function _half_precision (line 144) | def _half_precision(model: Module):
function quantize_pytorch (line 148) | def quantize_pytorch(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/tensor_rt.py
function quantize_tensorrt (line 16) | def quantize_tensorrt(
class TensorRTCalibrator (line 42) | class TensorRTCalibrator(IInt8EntropyCalibrator2):
method __init__ (line 43) | def __init__(
method get_batch (line 50) | def get_batch(self, names):
method get_batch_size (line 69) | def get_batch_size(self):
method read_calibration_cache (line 72) | def read_calibration_cache(self):
method write_calibration_cache (line 75) | def write_calibration_cache(self, cache):
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/tensorflow.py
function _quantize_dynamic (line 7) | def _quantize_dynamic(model: tf.Module):
function _quantize_static (line 14) | def _quantize_static(model: tf.Module, dataset: List[Tuple[tf.Tensor, .....
function _half_precision (line 26) | def _half_precision(model: tf.Module):
function quantize_tensorflow (line 34) | def quantize_tensorflow(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/tvm.py
class TVMCalibrator (line 16) | class TVMCalibrator(DataManager):
method __init__ (line 17) | def __init__(self, data_reader: Sequence, input_names: List[str]):
method __getitem__ (line 21) | def __getitem__(self, item: int):
function quantize_apache_tvm (line 26) | def quantize_apache_tvm(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/utils.py
function check_quantization (line 6) | def check_quantization(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/tensor_rt.py
class TensorRTCompiler (line 36) | class TensorRTCompiler(Compiler, abc.ABC):
method __init__ (line 46) | def __init__(self):
method _extract_dynamic_shape_ranges (line 51) | def _extract_dynamic_shape_ranges(model_params: ModelParams):
method execute (line 96) | def execute(self, *args, **kwargs):
class PyTorchTensorRTCompiler (line 100) | class PyTorchTensorRTCompiler(TensorRTCompiler):
method execute (line 101) | def execute(
method _compile_model (line 191) | def _compile_model(
method _quantize_model (line 255) | def _quantize_model(**kwargs) -> Any:
class ONNXTensorRTCompiler (line 259) | class ONNXTensorRTCompiler(TensorRTCompiler):
method __init__ (line 260) | def __init__(self):
method execute (line 266) | def execute(
method _compile_model (line 416) | def _compile_model(
method _quantize_model (line 450) | def _quantize_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/tensorflow.py
class TensorflowBackendCompiler (line 18) | class TensorflowBackendCompiler(Compiler):
method execute (line 24) | def execute(
method _compile_model (line 64) | def _compile_model(self):
method _quantize_model (line 68) | def _quantize_model(**kwargs):
class TFLiteBackendCompiler (line 72) | class TFLiteBackendCompiler(Compiler):
method execute (line 83) | def execute(
method _compile_model (line 133) | def _compile_model(
method _quantize_model (line 142) | def _quantize_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/torch_dynamo.py
class TorchDynamoCompiler (line 15) | class TorchDynamoCompiler(Compiler):
method execute (line 21) | def execute(
method _compile_model (line 59) | def _compile_model(
method _quantize_model (line 69) | def _quantize_model(self, **kwargs) -> Any:
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/torch_neuron.py
class TorchNeuronCompiler (line 17) | class TorchNeuronCompiler(Compiler):
method _check_dynamic_shape (line 25) | def _check_dynamic_shape(network_parameters: ModelParams) -> bool:
method execute (line 54) | def execute(
method _compile_model (line 102) | def _compile_model(
method _quantize_model (line 151) | def _quantize_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/torch_xla.py
class TorchXLACompiler (line 11) | class TorchXLACompiler(TorchScriptCompiler):
method _compile_model (line 19) | def _compile_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/torchscript.py
class TorchScriptCompiler (line 24) | class TorchScriptCompiler(Compiler):
method execute (line 33) | def execute(
method _compile_model (line 84) | def _compile_model(
method _quantize_model (line 123) | def _quantize_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/tvm.py
class ApacheTVMCompiler (line 42) | class ApacheTVMCompiler(Compiler, ABC):
method __init__ (line 58) | def __init__(self):
method execute (line 62) | def execute(
method _build_tvm_model (line 113) | def _build_tvm_model(self, model: Any, model_params: ModelParams):
method _build_tvm_model_from_torch (line 117) | def _build_tvm_model_from_torch(
method _build_tvm_model_from_onnx (line 143) | def _build_tvm_model_from_onnx(
method _quantize (line 157) | def _quantize(
method _get_target (line 175) | def _get_target(device) -> str:
method _tune_tvm_model (line 182) | def _tune_tvm_model(
method _compile_model (line 220) | def _compile_model(self, model: Any, params: Any) -> ExecutorFactoryMo...
method _quantize_model (line 233) | def _quantize_model(
class PyTorchApacheTVMCompiler (line 245) | class PyTorchApacheTVMCompiler(ApacheTVMCompiler):
method _build_tvm_model (line 246) | def _build_tvm_model(self, model: Any, model_params: ModelParams):
class ONNXApacheTVMCompiler (line 252) | class ONNXApacheTVMCompiler(ApacheTVMCompiler):
method _build_tvm_model (line 253) | def _build_tvm_model(self, model: Any, model_params: ModelParams):
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compilers/utils.py
function onnxruntime_is_available (line 7) | def onnxruntime_is_available() -> bool:
function tvm_is_available (line 16) | def tvm_is_available() -> bool:
function bladedisc_is_available (line 26) | def bladedisc_is_available() -> bool:
function tensorrt_is_available (line 35) | def tensorrt_is_available() -> bool:
function torch_tensorrt_is_available (line 45) | def torch_tensorrt_is_available() -> bool:
function openvino_is_available (line 54) | def openvino_is_available() -> bool:
function deepsparse_is_available (line 63) | def deepsparse_is_available() -> bool:
function intel_neural_compressor_is_available (line 72) | def intel_neural_compressor_is_available() -> bool:
function torch_xla_is_available (line 81) | def torch_xla_is_available():
function torch_neuron_is_available (line 90) | def torch_neuron_is_available():
function get_faster_transformer_repo_path (line 99) | def get_faster_transformer_repo_path() -> Path:
function faster_transformer_is_available (line 103) | def faster_transformer_is_available() -> bool:
function select_compilers_from_hardware_onnx (line 111) | def select_compilers_from_hardware_onnx(device: Device):
function select_compilers_from_hardware_torch (line 127) | def select_compilers_from_hardware_torch(device: Device):
function select_compilers_from_hardware_tensorflow (line 151) | def select_compilers_from_hardware_tensorflow():
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compressors/base.py
class Compressor (line 10) | class Compressor(Operation, ABC):
method __init__ (line 11) | def __init__(self, config_file: str = None):
method execute (line 18) | def execute(
method _read_config (line 28) | def _read_config(self, config_file: Optional[str]) -> Dict:
method _get_default_config (line 38) | def _get_default_config() -> Dict:
method config_key (line 43) | def config_key(self) -> str:
method get_result (line 46) | def get_result(self) -> Tuple[Any, Optional[float]]:
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compressors/intel.py
function _get_model_framework (line 18) | def _get_model_framework(model: Any) -> str:
class IntelPruningCompressor (line 27) | class IntelPruningCompressor(Compressor, ABC):
method __init__ (line 28) | def __init__(self, config_file: str = None):
method config_key (line 33) | def config_key(self) -> str:
method _get_default_config (line 37) | def _get_default_config() -> Dict:
method _prepare_pruning_config (line 76) | def _prepare_pruning_config(self, model: Any):
method execute (line 107) | def execute(
method _compute_error (line 132) | def _compute_error(
method _get_dataloader (line 143) | def _get_dataloader(input_data: DataManager):
class INCDataset (line 147) | class INCDataset(Dataset):
method __init__ (line 148) | def __init__(self, input_data: DataManager):
method __len__ (line 152) | def __len__(self):
method __getitem__ (line 155) | def __getitem__(self, idx):
class TorchIntelPruningCompressor (line 162) | class TorchIntelPruningCompressor(IntelPruningCompressor):
method _get_dataloader (line 164) | def _get_dataloader(input_data: DataManager):
method _compute_error (line 170) | def _compute_error(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compressors/scripts/__init__.py
function _export_model_onnx (line 41) | def _export_model_onnx(
class RecipeBuilder (line 62) | class RecipeBuilder:
method __init__ (line 63) | def __init__(self, model_path):
method _make_analysis (line 66) | def _make_analysis(self):
method _compute_loss_sensitivity (line 70) | def _compute_loss_sensitivity(self):
method build_recipe (line 123) | def build_recipe(self, epochs_pruning_window=None, training_epochs=10):
class PruningTrainer (line 181) | class PruningTrainer:
method __init__ (line 182) | def __init__(self, model, bs):
method _setup_training (line 188) | def _setup_training(self, loss_fn=None, lr=1e-3, momentum=0.9):
method _run_model_one_epoch (line 198) | def _run_model_one_epoch(self, train=False):
method train (line 236) | def train(
function _load_config (line 277) | def _load_config(config_file: str):
function _load_data (line 283) | def _load_data(data_dir: str):
function _load_model (line 288) | def _load_model(model_file: str):
function _train_model (line 302) | def _train_model(
function _save_model (line 332) | def _save_model(model: torch.nn.Module, path: str):
function main (line 339) | def main(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compressors/scripts/neural_magic_training.py
function _export_model_onnx (line 41) | def _export_model_onnx(
class RecipeBuilder (line 62) | class RecipeBuilder:
method __init__ (line 63) | def __init__(self, model_path):
method _make_analysis (line 66) | def _make_analysis(self):
method _compute_loss_sensitivity (line 70) | def _compute_loss_sensitivity(self):
method build_recipe (line 123) | def build_recipe(self, epochs_pruning_window=None, training_epochs=10):
class PruningTrainer (line 181) | class PruningTrainer:
method __init__ (line 182) | def __init__(self, model, bs):
method _setup_training (line 188) | def _setup_training(self, loss_fn=None, lr=1e-3, momentum=0.9):
method _run_model_one_epoch (line 198) | def _run_model_one_epoch(self, train=False):
method train (line 236) | def train(
function _load_config (line 277) | def _load_config(config_file: str):
function _load_data (line 283) | def _load_data(data_dir: str):
function _load_model (line 288) | def _load_model(model_file: str):
function _train_model (line 302) | def _train_model(
function _save_model (line 332) | def _save_model(model: torch.nn.Module, path: str):
function main (line 339) | def main(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/compressors/sparseml.py
function _save_model (line 16) | def _save_model(model: Module, path: Path):
function _load_model (line 31) | def _load_model(path: Path):
function _save_dataset (line 38) | def _save_dataset(input_data: DataManager, path: Path):
function _save_json (line 44) | def _save_json(dictionary: Dict, path: Path):
function _write_requirements_file (line 49) | def _write_requirements_file(path: Path):
class SparseMLCompressor (line 55) | class SparseMLCompressor(Compressor):
method execute (line 56) | def execute(
method _compute_error (line 114) | def _compute_error(
method _get_default_config (line 136) | def _get_default_config() -> Dict:
method config_key (line 146) | def config_key(self) -> str:
FILE: optimization/nebullvm/nebullvm/operations/optimizations/optimize_inference.py
class OptimizeInferenceOp (line 59) | class OptimizeInferenceOp(Operation):
method __init__ (line 60) | def __init__(self):
method _as_data_manager (line 67) | def _as_data_manager(data) -> DataManager:
method _check_inputs (line 88) | def _check_inputs(model: Any, input_data: types.InputData):
method execute (line 94) | def execute(
method _optimize (line 276) | def _optimize(
method _extract_lowest_latency_model (line 332) | def _extract_lowest_latency_model(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/optimizers/base.py
class Optimizer (line 89) | class Optimizer(Operation, abc.ABC):
method __init__ (line 90) | def __init__(self):
method execute (line 99) | def execute(
method _select_compilers_from_hardware (line 153) | def _select_compilers_from_hardware(self):
method _load_compilers (line 157) | def _load_compilers(
method free_model_gpu (line 176) | def free_model_gpu(self, model: Any):
method _optimize (line 189) | def _optimize(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/optimizers/optimizers.py
class PytorchOptimizer (line 31) | class PytorchOptimizer(Optimizer):
method __init__ (line 32) | def __init__(self):
method _select_compilers_from_hardware (line 36) | def _select_compilers_from_hardware(self):
class TensorflowOptimizer (line 86) | class TensorflowOptimizer(Optimizer):
method __init__ (line 87) | def __init__(self):
method _select_compilers_from_hardware (line 91) | def _select_compilers_from_hardware(self):
class ONNXOptimizer (line 99) | class ONNXOptimizer(Optimizer):
method __init__ (line 100) | def __init__(self):
method _select_compilers_from_hardware (line 104) | def _select_compilers_from_hardware(self):
FILE: optimization/nebullvm/nebullvm/operations/optimizations/tests/test_deepsparse.py
function test_deepsparse (line 44) | def test_deepsparse(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/tests/test_intel_neural_compressor.py
function test_neural_compressor (line 47) | def test_neural_compressor(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/tests/test_onnxruntime.py
function test_onnxruntime (line 51) | def test_onnxruntime(
function test_onnxruntime_quantization (line 210) | def test_onnxruntime_quantization(
function test_onnxruntime_half (line 373) | def test_onnxruntime_half(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/tests/test_openvino.py
function test_openvino (line 70) | def test_openvino(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/tests/test_tensor_rt.py
function test_tensorrt_onnx (line 67) | def test_tensorrt_onnx(
function test_tensorrt_torch (line 219) | def test_tensorrt_torch(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/tests/test_tensorflow.py
function test_tensorflow_backend (line 44) | def test_tensorflow_backend(
function test_tf_lite (line 171) | def test_tf_lite(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/tests/test_torch_dynamo.py
function run_test_torch_dynamo (line 34) | def run_test_torch_dynamo(
function test_torch_dynamo_fp32 (line 154) | def test_torch_dynamo_fp32(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/tests/test_torchscript.py
function run_test_torchscript (line 34) | def run_test_torchscript(
function test_torchscript_no_quantization (line 145) | def test_torchscript_no_quantization(
function test_torchscript_half_quantization (line 183) | def test_torchscript_half_quantization(
function test_torchscript_int8_quantization (line 228) | def test_torchscript_int8_quantization(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/tests/test_tvm.py
function test_tvm_onnx (line 75) | def test_tvm_onnx(
function test_tvm_torch (line 209) | def test_tvm_torch(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/tests/utils.py
class TestModel (line 37) | class TestModel(torch.nn.Module):
method __init__ (line 38) | def __init__(self):
method forward (line 50) | def forward(self, input_tensor_0, input_tensor_1):
function tensorflow_model (line 58) | def tensorflow_model():
function _build_static_model (line 70) | def _build_static_model(
function _build_dynamic_model (line 94) | def _build_dynamic_model(
function get_torch_model (line 138) | def get_torch_model(dynamic: bool = False):
function get_tensorflow_model (line 150) | def get_tensorflow_model(dynamic: bool = False):
function get_huggingface_model (line 162) | def get_huggingface_model(temp_dir: str, dl_framework: DeepLearningFrame...
function initialize_model (line 217) | def initialize_model(
function check_model_validity (line 287) | def check_model_validity(
FILE: optimization/nebullvm/nebullvm/operations/optimizations/utils.py
function map_compilers_and_compressors (line 4) | def map_compilers_and_compressors(ignore_list: List, enum_class: Callable):
FILE: optimization/nebullvm/nebullvm/optional_modules/dummy.py
class DummyClass (line 1) | class DummyClass:
FILE: optimization/nebullvm/nebullvm/optional_modules/tensorflow.py
class Keras (line 11) | class Keras:
class data (line 15) | class data:
class dtypes (line 19) | class dtypes:
class Tensorflow (line 23) | class Tensorflow:
method function (line 32) | def function(**kwargs):
FILE: optimization/nebullvm/nebullvm/optional_modules/torch.py
class nn (line 20) | class nn:
class jit (line 23) | class jit:
class fx (line 26) | class fx:
class torch (line 29) | class torch:
method no_grad (line 41) | def no_grad():
method inference_mode (line 45) | def inference_mode():
FILE: optimization/nebullvm/nebullvm/optional_modules/utils.py
function torch_is_available (line 21) | def torch_is_available() -> bool:
function tensorflow_is_available (line 45) | def tensorflow_is_available() -> bool:
function onnx_is_available (line 61) | def onnx_is_available() -> bool:
function _onnxmltools_is_available (line 76) | def _onnxmltools_is_available():
function _onnxsim_is_available (line 93) | def _onnxsim_is_available():
function _polygraphy_is_available (line 102) | def _polygraphy_is_available():
function tf2onnx_is_available (line 111) | def tf2onnx_is_available():
function check_dependencies (line 120) | def check_dependencies(device: Device):
FILE: optimization/nebullvm/nebullvm/tools/adapters.py
class ModelAdapter (line 37) | class ModelAdapter(abc.ABC):
method adapted_model (line 40) | def adapted_model(self):
method adapted_data (line 45) | def adapted_data(self):
method adapt_inference_learner (line 49) | def adapt_inference_learner(
method adapt_original_model (line 55) | def adapt_original_model(
class DiffusionAdapter (line 61) | class DiffusionAdapter(ModelAdapter):
method __init__ (line 62) | def __init__(
method __benchmark_pipeline (line 76) | def __benchmark_pipeline(
method __adapt (line 96) | def __adapt(self):
method adapted_model (line 125) | def adapted_model(self):
method adapted_data (line 131) | def adapted_data(self):
method adapt_inference_learner (line 136) | def adapt_inference_learner(
method adapt_original_model (line 170) | def adapt_original_model(
class HuggingFaceAdapter (line 193) | class HuggingFaceAdapter(ModelAdapter):
method __init__ (line 194) | def __init__(self, model: Any, data: List, device: Device, **kwargs):
method __adapt_model (line 206) | def __adapt_model(self):
method adapted_model (line 229) | def adapted_model(self):
method adapted_data (line 235) | def adapted_data(self):
method adapt_inference_learner (line 240) | def adapt_inference_learner(
method adapt_original_model (line 256) | def adapt_original_model(
FILE: optimization/nebullvm/nebullvm/tools/benchmark.py
function _get_dl_framework (line 25) | def _get_dl_framework(model: Any):
function _create_model_inputs (line 42) | def _create_model_inputs(
class BaseBenchmark (line 57) | class BaseBenchmark(ABC):
method __init__ (line 58) | def __init__(self, model, input_tensors, device, n_warmup=50, n_runs=1...
method benchmark (line 66) | def benchmark(self):
class PytorchBenchmark (line 70) | class PytorchBenchmark(BaseBenchmark):
method benchmark (line 71) | def benchmark(self):
class TensorflowBenchmark (line 117) | class TensorflowBenchmark(BaseBenchmark):
method benchmark (line 118) | def benchmark(self):
class NumpyBenchmark (line 159) | class NumpyBenchmark(BaseBenchmark):
method benchmark (line 160) | def benchmark(self):
function benchmark (line 204) | def benchmark(
FILE: optimization/nebullvm/nebullvm/tools/data.py
class DataManager (line 12) | class DataManager:
method __init__ (line 21) | def __init__(self, data_reader: Sequence):
method __getitem__ (line 27) | def __getitem__(self, item):
method __len__ (line 30) | def __len__(self):
method __iter__ (line 33) | def __iter__(self):
method __next__ (line 37) | def __next__(self):
method get_numpy_list (line 45) | def get_numpy_list(
method get_list (line 63) | def get_list(
method from_iterable (line 93) | def from_iterable(cls, iterable: Iterable, max_length: int = 500):
method from_dataloader (line 97) | def from_dataloader(
method get_split (line 158) | def get_split(self, split_type="train"):
method split (line 165) | def split(self, split_pct: float, shuffle: bool = False):
class PytorchDataset (line 188) | class PytorchDataset(Dataset):
method __init__ (line 189) | def __init__(self, input_data: DataManager, has_labels: bool = False):
method __len__ (line 194) | def __len__(self):
method __getitem__ (line 197) | def __getitem__(self, idx):
FILE: optimization/nebullvm/nebullvm/tools/diffusers.py
function get_unet_inputs (line 38) | def get_unet_inputs(
class DiffusionUNetWrapper (line 122) | class DiffusionUNetWrapper(torch.nn.Module):
method __init__ (line 123) | def __init__(self, model):
method forward (line 127) | def forward(self, *x, **kwargs):
class OptimizedDiffusionWrapper (line 133) | class OptimizedDiffusionWrapper(torch.nn.Module):
method __init__ (line 134) | def __init__(self, model):
method forward (line 138) | def forward(self, *x, **kwargs):
function is_diffusion_model_pipe (line 148) | def is_diffusion_model_pipe(model):
function get_default_dynamic_info (line 152) | def get_default_dynamic_info(input_shape: List[Tuple[int, ...]]):
function preprocess_diffusers (line 189) | def preprocess_diffusers(pipe: DiffusionPipeline) -> torch.nn.Module:
function postprocess_diffusers (line 196) | def postprocess_diffusers(
class Optimizer (line 213) | class Optimizer:
method __init__ (line 214) | def __init__(self, onnx_graph, verbose=False):
method info (line 218) | def info(self, prefix):
method cleanup (line 224) | def cleanup(self, return_onnx=False):
method select_outputs (line 229) | def select_outputs(self, keep, names=None):
method fold_constants (line 235) | def fold_constants(self, return_onnx=False):
method infer_shapes (line 244) | def infer_shapes(self, return_onnx=False):
function get_path (line 256) | def get_path(version, inpaint=False):
function get_embedding_dim (line 285) | def get_embedding_dim(version):
class BaseModel (line 294) | class BaseModel:
method __init__ (line 295) | def __init__(
method get_model (line 323) | def get_model(self):
method get_input_names (line 326) | def get_input_names(self):
method get_output_names (line 329) | def get_output_names(self):
method get_dynamic_axes (line 332) | def get_dynamic_axes(self):
method get_sample_input (line 335) | def get_sample_input(self, batch_size, image_height, image_width):
method get_input_profile (line 338) | def get_input_profile(
method get_shape_dict (line 343) | def get_shape_dict(self, batch_size, image_height, image_width):
method optimize (line 346) | def optimize(self, onnx_graph):
method check_dims (line 359) | def check_dims(self, batch_size, image_height, image_width):
method get_minmax_dims (line 374) | def get_minmax_dims(
class CLIP (line 415) | class CLIP(BaseModel):
method __init__ (line 416) | def __init__(
method get_model (line 429) | def get_model(self):
method get_input_names (line 434) | def get_input_names(self):
method get_output_names (line 437) | def get_output_names(self):
method get_dynamic_axes (line 440) | def get_dynamic_axes(self):
method get_input_profile (line 443) | def get_input_profile(
method get_shape_dict (line 458) | def get_shape_dict(self, batch_size, image_height, image_width):
method get_sample_input (line 469) | def get_sample_input(self, batch_size, image_height, image_width):
method optimize (line 475) | def optimize(self, onnx_graph):
function make_CLIP (line 494) | def make_CLIP(
class UNet (line 507) | class UNet(BaseModel):
method __init__ (line 508) | def __init__(
method get_model (line 533) | def get_model(self):
method get_input_names (line 546) | def get_input_names(self):
method get_output_names (line 549) | def get_output_names(self):
method get_dynamic_axes (line 552) | def get_dynamic_axes(self):
method get_input_profile (line 559) | def get_input_profile(
method get_shape_dict (line 602) | def get_shape_dict(self, batch_size, image_height, image_width):
method get_sample_input (line 621) | def get_sample_input(self, batch_size, image_height, image_width):
function make_UNet (line 646) | def make_UNet(
class VAE (line 661) | class VAE(BaseModel):
method __init__ (line 662) | def __init__(
method get_model (line 675) | def get_model(self):
method get_input_names (line 682) | def get_input_names(self):
method get_output_names (line 685) | def get_output_names(self):
method get_dynamic_axes (line 688) | def get_dynamic_axes(self):
method get_input_profile (line 694) | def get_input_profile(
method get_shape_dict (line 722) | def get_shape_dict(self, batch_size, image_height, image_width):
method get_sample_input (line 731) | def get_sample_input(self, batch_size, image_height, image_width):
function make_VAE (line 745) | def make_VAE(
class TorchVAEEncoder (line 758) | class TorchVAEEncoder(torch.nn.Module):
method __init__ (line 759) | def __init__(self, token, device, path):
method forward (line 766) | def forward(self, x):
class VAEEncoder (line 770) | class VAEEncoder(BaseModel):
method __init__ (line 771) | def __init__(
method get_model (line 784) | def get_model(self):
method get_input_names (line 788) | def get_input_names(self):
method get_output_names (line 791) | def get_output_names(self):
method get_dynamic_axes (line 794) | def get_dynamic_axes(self):
method get_input_profile (line 800) | def get_input_profile(
method get_shape_dict (line 830) | def get_shape_dict(self, batch_size, image_height, image_width):
method get_sample_input (line 839) | def get_sample_input(self, batch_size, image_height, image_width):
function make_VAEEncoder (line 851) | def make_VAEEncoder(
function make_tokenizer (line 864) | def make_tokenizer(version, hf_token):
function is_diffusion_model (line 870) | def is_diffusion_model(model) -> bool:
FILE: optimization/nebullvm/nebullvm/tools/feedback_collector.py
class FeedbackCollector (line 13) | class FeedbackCollector:
method __init__ (line 14) | def __init__(
method _store_ip_address (line 27) | def _store_ip_address(self):
method is_active (line 36) | def is_active(self):
method _inform_user (line 39) | def _inform_user(self):
method store_info (line 47) | def store_info(self, key: str, value: Any):
method send_feedback (line 53) | def send_feedback(self, timeout: int = 30):
method get (line 70) | def get(self, key: str, default: Any = None):
method reset (line 73) | def reset(self, key: str):
FILE: optimization/nebullvm/nebullvm/tools/hardware_utils.py
function get_hw_setup (line 22) | def get_hw_setup(device: Device = None) -> HardwareSetup:
function _get_gpu_name (line 44) | def _get_gpu_name() -> str:
function _get_neuron_device_name (line 54) | def _get_neuron_device_name() -> str:
function _get_tpu_device_name (line 66) | def _get_tpu_device_name() -> str:
FILE: optimization/nebullvm/nebullvm/tools/huggingface.py
class PyTorchTransformerWrapper (line 29) | class PyTorchTransformerWrapper(Module):
method __init__ (line 36) | def __init__(
method forward (line 47) | def forward(self, *args: torch.Tensor):
class TensorFlowTransformerWrapper (line 56) | class TensorFlowTransformerWrapper(tf.keras.Model):
method __init__ (line 57) | def __init__(
method call (line 68) | def call(self, *args: tf.Tensor):
function flatten_outputs (line 77) | def flatten_outputs(
function get_size_recursively (line 90) | def get_size_recursively(
function get_output_structure_from_text (line 100) | def get_output_structure_from_text(
function get_output_structure_from_dict (line 132) | def get_output_structure_from_dict(
function restructure_output (line 167) | def restructure_output(
FILE: optimization/nebullvm/nebullvm/tools/logger.py
function debug_mode_enabled (line 18) | def debug_mode_enabled():
function setup_logger (line 22) | def setup_logger():
class LoggingContext (line 41) | class LoggingContext(object):
method __init__ (line 42) | def __init__(
method __enter__ (line 54) | def __enter__(self):
method __exit__ (line 59) | def __exit__(self, et: Any, ev: Any, tb: Any):
FILE: optimization/nebullvm/nebullvm/tools/onnx.py
function convert_to_numpy (line 20) | def convert_to_numpy(tensor: Any):
function convert_to_target_framework (line 33) | def convert_to_target_framework(
function get_input_names (line 44) | def get_input_names(onnx_model: str):
function get_output_names (line 50) | def get_output_names(onnx_model: str):
function run_onnx_model (line 56) | def run_onnx_model(
function _extract_dynamic_axis (line 85) | def _extract_dynamic_axis(
function extract_info_from_np_data (line 117) | def extract_info_from_np_data(
function get_output_info_onnx (line 149) | def get_output_info_onnx(
function create_model_inputs_onnx (line 160) | def create_model_inputs_onnx(input_infos: List[InputInfo]) -> List[np.nd...
function onnx_is_gpu_available (line 174) | def onnx_is_gpu_available():
FILE: optimization/nebullvm/nebullvm/tools/pytorch.py
function save_with_torch_fx (line 14) | def save_with_torch_fx(model: torch.nn.Module, path: Path):
function load_with_torch_fx (line 19) | def load_with_torch_fx(
function get_output_info_torch (line 31) | def get_output_info_torch(
function create_model_inputs_torch (line 58) | def create_model_inputs_torch(
function run_torch_model (line 74) | def run_torch_model(
function _extract_dynamic_axis (line 103) | def _extract_dynamic_axis(
function extract_info_from_torch_data (line 133) | def extract_info_from_torch_data(
function torch_is_gpu_available (line 187) | def torch_is_gpu_available():
function torch_get_device_name (line 191) | def torch_get_device_name():
function get_torch_model_size (line 195) | def get_torch_model_size(
FILE: optimization/nebullvm/nebullvm/tools/tests/test_data.py
function test_custom_input_data (line 7) | def test_custom_input_data():
function test_torch_dataloader_single_input_with_label (line 24) | def test_torch_dataloader_single_input_with_label():
function test_torch_dataloader_two_inputs_with_label (line 38) | def test_torch_dataloader_two_inputs_with_label():
function test_torch_dataloader_three_inputs_with_label (line 54) | def test_torch_dataloader_three_inputs_with_label():
function test_torch_dataloader_single_input_without_label (line 73) | def test_torch_dataloader_single_input_without_label():
function test_tensorflow_dataloader_single_input_with_label (line 84) | def test_tensorflow_dataloader_single_input_with_label():
function test_tensorflow_dataloader_two_inputs_with_label (line 97) | def test_tensorflow_dataloader_two_inputs_with_label():
function test_tensorflow_dataloader_three_inputs_with_label (line 115) | def test_tensorflow_dataloader_three_inputs_with_label():
function test_tensorflow_dataloader_single_input_without_label (line 135) | def test_tensorflow_dataloader_single_input_without_label():
FILE: optimization/nebullvm/nebullvm/tools/tests/test_hardware_utils.py
class TestGetHwSetup (line 7) | class TestGetHwSetup(unittest.TestCase):
method test_hw_setup__gpu_not_available (line 17) | def test_hw_setup__gpu_not_available(self, *_):
method test_hw_setup__gpu_is_available (line 28) | def test_hw_setup__gpu_is_available(self, *_):
FILE: optimization/nebullvm/nebullvm/tools/tests/test_utils.py
class TestGetThroughput (line 8) | class TestGetThroughput(unittest.TestCase):
method test_latency_is_zero (line 9) | def test_latency_is_zero(self):
class TestCheckDevice (line 13) | class TestCheckDevice(unittest.TestCase):
method test_device_is_none_no_device_available (line 17) | def test_device_is_none_no_device_available(self, *_):
method test_device_is_none_gpu_is_available (line 25) | def test_device_is_none_gpu_is_available(self, *_):
method test_device_is_none_tpu_is_available (line 33) | def test_device_is_none_tpu_is_available(self, *_):
method test_device_is_none_neuron_is_available (line 41) | def test_device_is_none_neuron_is_available(self, *_):
method test_device_is_cpu (line 46) | def test_device_is_cpu(self):
method test_device_is_gpu_no_gpu_available (line 52) | def test_device_is_gpu_no_gpu_available(self, _):
method test_device_is_gpu_gpu_is_available (line 70) | def test_device_is_gpu_gpu_is_available(self, _):
method test_device_is_tpu_no_tpu_available (line 88) | def test_device_is_tpu_no_tpu_available(self, _):
method test_device_is_tpu_tpu_is_available (line 98) | def test_device_is_tpu_tpu_is_available(self, _):
method test_device_is_neuron_no_neuron_available (line 108) | def test_device_is_neuron_no_neuron_available(self, _):
method test_device_is_neuron_neuron_is_available (line 118) | def test_device_is_neuron_neuron_is_available(self, _):
FILE: optimization/nebullvm/nebullvm/tools/tf.py
function get_output_info_tf (line 10) | def get_output_info_tf(
function create_model_inputs_tf (line 30) | def create_model_inputs_tf(input_infos: List[InputInfo]) -> List[tf.Tens...
function run_tf_model (line 54) | def run_tf_model(
function _extract_dynamic_axis (line 66) | def _extract_dynamic_axis(
function extract_info_from_tf_data (line 96) | def extract_info_from_tf_data(
function tensorflow_is_gpu_available (line 129) | def tensorflow_is_gpu_available():
function tensorflow_get_gpu_name (line 133) | def tensorflow_get_gpu_name():
FILE: optimization/nebullvm/nebullvm/tools/transformations.py
class BaseTransformation (line 11) | class BaseTransformation(ABC):
method _transform (line 13) | def _transform(self, _input: Any, **kwargs) -> Any:
method __call__ (line 16) | def __call__(self, _input: Any, **kwargs):
method to_dict (line 19) | def to_dict(self):
method from_dict (line 26) | def from_dict(cls, tfm_dict: Dict):
class MultiStageTransformation (line 30) | class MultiStageTransformation(BaseTransformation):
method __init__ (line 31) | def __init__(self, transformations: List[BaseTransformation]):
method _transform (line 34) | def _transform(self, _input: Any, **kwargs) -> Any:
method append (line 39) | def append(self, __tfm: BaseTransformation):
method extend (line 42) | def extend(self, tfms: List[BaseTransformation]):
method to_dict (line 45) | def to_dict(self) -> Dict:
method to_list (line 48) | def to_list(self):
method from_dict (line 52) | def from_dict(cls, tfms_dict: Dict):
method copy (line 60) | def copy(self):
method __len__ (line 64) | def __len__(self):
class HalfPrecisionTransformation (line 68) | class HalfPrecisionTransformation(BaseTransformation):
method _transform_numpy (line 70) | def _transform_numpy(_input: np.ndarray) -> np.ndarray:
method _transform_tf (line 74) | def _transform_tf(_input: tf.Tensor) -> tf.Tensor:
method _transform_torch (line 78) | def _transform_torch(_input: torch.Tensor) -> torch.Tensor:
method _transform (line 81) | def _transform(self, _input: Any, **kwargs) -> Any:
class NoOp (line 108) | class NoOp(BaseTransformation):
method _transform (line 109) | def _transform(self, _input: Any, **kwargs):
class VerifyContiguity (line 113) | class VerifyContiguity(BaseTransformation):
method _transform (line 114) | def _transform(self, _input: Any, **kwargs) -> Any:
FILE: optimization/nebullvm/nebullvm/tools/utils.py
function get_model_size_mb (line 46) | def get_model_size_mb(model: Any) -> float:
function get_model_name (line 60) | def get_model_name(model: Any) -> str:
function generate_model_id (line 68) | def generate_model_id(model: Any) -> str:
function get_throughput (line 73) | def get_throughput(latency: float, batch_size: int = 1) -> float:
function ifnone (line 79) | def ifnone(target, new_value):
function inspect_dynamic_size (line 86) | def inspect_dynamic_size(
function gpu_is_available (line 101) | def gpu_is_available():
function neuron_is_available (line 109) | def neuron_is_available():
function tpu_is_available (line 117) | def tpu_is_available():
function check_module_version (line 128) | def check_module_version(
function is_python_version_3_10 (line 144) | def is_python_version_3_10():
function get_dl_framework (line 151) | def get_dl_framework(model: Any):
function check_input_data (line 168) | def check_input_data(input_data: Union[Iterable, Sequence]):
function is_data_subscriptable (line 187) | def is_data_subscriptable(input_data: Union[Iterable, Sequence]):
function check_dynamic_info_inputs (line 196) | def check_dynamic_info_inputs(
function extract_info_from_data (line 220) | def extract_info_from_data(
function is_huggingface_data (line 255) | def is_huggingface_data(data_sample: Any) -> bool:
function is_dict_type (line 265) | def is_dict_type(data_sample: Any):
function _get_idx (line 274) | def _get_idx(device: str) -> int:
function _set_device (line 283) | def _set_device(
function check_device (line 301) | def check_device(device: Optional[str] = None) -> Device:
function get_gpu_compute_capability (line 336) | def get_gpu_compute_capability(gpu_idx: int) -> float:
FILE: optimization/nebullvm/nebullvm/tools/venv.py
class EnvBuilder (line 8) | class EnvBuilder(venv.EnvBuilder):
method __init__ (line 9) | def __init__(self, *args, **kwargs):
method post_setup (line 13) | def post_setup(self, context):
function run_in_different_venv (line 17) | def run_in_different_venv(
FILE: optimization/open_alpha_tensor/main.py
function _compute_largest_divisor (line 9) | def _compute_largest_divisor(n: int) -> int:
function main (line 17) | def main():
FILE: optimization/open_alpha_tensor/open_alpha_tensor/api/functions.py
function train_alpha_tensor (line 6) | def train_alpha_tensor(
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/actors/stage.py
function game_is_finished (line 12) | def game_is_finished(state):
function remove_duplicates (line 22) | def remove_duplicates(reducing_tensor: torch.Tensor):
function extract_children_states_from_actions (line 60) | def extract_children_states_from_actions(
function _reduce_memory_consumption_before_storing (line 117) | def _reduce_memory_consumption_before_storing(
function _recompose_possible_states (line 134) | def _recompose_possible_states(reduced_memory_states_dict: Dict):
function extract_present_state (line 155) | def extract_present_state(state: torch.Tensor) -> torch.Tensor:
function to_hash (line 159) | def to_hash(tensor: torch.Tensor) -> str:
function from_hash (line 171) | def from_hash(hashable_tensor: str, shape: tuple) -> torch.Tensor:
function record_action (line 183) | def record_action(tree_dict: Dict, state: str, action: str):
function select_future_state (line 197) | def select_future_state(
function simulate_game (line 228) | def simulate_game(
function backward_pass (line 313) | def backward_pass(trajectory, states_dict, leaf_q_value: torch.Tensor):
function monte_carlo_tree_search (line 342) | def monte_carlo_tree_search(
function compute_improved_policy (line 389) | def compute_improved_policy(
function actor_prediction (line 420) | def actor_prediction(
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/data/basis_change.py
function get_change_basis_matrix (line 8) | def get_change_basis_matrix(
function cob_entry_prob_distribution (line 36) | def cob_entry_prob_distribution(size):
class ChangeOfBasis (line 47) | class ChangeOfBasis:
method __init__ (line 52) | def __init__(
method __call__ (line 83) | def __call__(self, tensor: torch.Tensor, return_basis: bool = False):
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/data/dataset.py
function compute_move (line 21) | def compute_move(triplets: Tuple[torch.Tensor, torch.Tensor, torch.Tenso...
class SyntheticDataBuffer (line 33) | class SyntheticDataBuffer(Dataset):
method __init__ (line 36) | def __init__(
method __len__ (line 95) | def __len__(self):
method __getitem__ (line 99) | def __getitem__(self, idx):
method _apply_moves (line 146) | def _apply_moves(
class GameDataBuffer (line 165) | class GameDataBuffer(Dataset):
method __init__ (line 168) | def __init__(self, device: str, max_buffer_size: int):
method __del__ (line 181) | def __del__(self):
method add_game (line 184) | def add_game(
method __len__ (line 215) | def __len__(self):
method __getitem__ (line 219) | def __getitem__(self, idx):
method save_game_data (line 234) | def save_game_data(self, path: str):
method load_game_data (line 242) | def load_game_data(self, path: str):
class TensorGameDataset (line 252) | class TensorGameDataset(Dataset):
method __init__ (line 259) | def __init__(
method change_training_split (line 303) | def change_training_split(self, pct_synth, pct_best_game):
method recompute_synthetic_indexes (line 307) | def recompute_synthetic_indexes(self):
method __getitem__ (line 354) | def __getitem__(self, idx):
method __len__ (line 378) | def __len__(self):
method add_game (line 381) | def add_game(
method add_best_game (line 389) | def add_best_game(
method save_game_data (line 397) | def save_game_data(self, path):
method load_game_data (line 403) | def load_game_data(self, path):
method input_tensor (line 410) | def input_tensor(self) -> torch.Tensor:
method _build_tensor_game_input (line 440) | def _build_tensor_game_input(
method games_are_good (line 459) | def games_are_good(self):
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/data/generation.py
function generate_synthetic_data (line 6) | def generate_synthetic_data(
function f_prob_distribution (line 48) | def f_prob_distribution(size):
function z2_prob_distribution (line 64) | def z2_prob_distribution(size):
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/data/utils.py
function get_scalars (line 6) | def get_scalars(input_tensor: torch.Tensor, t_step: int, with_bs: bool =...
function map_triplet_to_action (line 25) | def map_triplet_to_action(
function _single_action_to_triplet (line 60) | def _single_action_to_triplet(
function map_action_to_triplet (line 92) | def map_action_to_triplet(
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/modules/alpha_tensor.py
class AlphaTensorModel (line 11) | class AlphaTensorModel(torch.nn.Module):
method __init__ (line 12) | def __init__(
method device (line 47) | def device(self):
method _train_forward (line 50) | def _train_forward(
method _eval_forward (line 71) | def _eval_forward(self, x: torch.Tensor, s: torch.Tensor):
method forward (line 78) | def forward(
method n_logits (line 92) | def n_logits(self):
method n_steps (line 96) | def n_steps(self):
method n_samples (line 100) | def n_samples(self):
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/modules/attention.py
class AttentionHead (line 5) | class AttentionHead(torch.nn.Module):
method __init__ (line 6) | def __init__(self, x_size: int, y_size: int, proj_dim: int):
method forward (line 16) | def forward(self, x: torch.Tensor, y: torch.Tensor, mask: bool = False):
class AttentionDenseBlock (line 31) | class AttentionDenseBlock(torch.nn.Module):
method __init__ (line 32) | def __init__(self, inner_size: int, multiplier: int = 4):
method forward (line 41) | def forward(self, x: torch.Tensor):
class AlphaMultiHeadAttention (line 46) | class AlphaMultiHeadAttention(torch.nn.Module):
method __init__ (line 47) | def __init__(
method forward (line 67) | def forward(
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/modules/extras.py
class QuantileLoss (line 4) | class QuantileLoss(torch.nn.Module):
method __init__ (line 5) | def __init__(self, delta: float = 1.0):
method forward (line 9) | def forward(self, q: torch.Tensor, g: torch.Tensor):
class ValueRiskManagement (line 17) | class ValueRiskManagement(torch.nn.Module):
method __init__ (line 18) | def __init__(self, u_q: float = 0.75):
method forward (line 22) | def forward(self, q: torch.Tensor):
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/modules/heads.py
class PositionEncoding (line 9) | class PositionEncoding(torch.nn.Module):
method __init__ (line 10) | def __init__(self, d_model: int, max_len: int = 5000):
method forward (line 22) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class PolicyHeadDoubleAttention (line 31) | class PolicyHeadDoubleAttention(torch.nn.Module):
method __init__ (line 32) | def __init__(
method forward (line 49) | def forward(self, x: torch.Tensor, e: torch.Tensor):
class PolicyHeadCore (line 61) | class PolicyHeadCore(torch.nn.Module):
method __init__ (line 62) | def __init__(
method forward (line 86) | def forward(self, a: torch.Tensor, e: torch.Tensor):
function sample_from_logits (line 94) | def sample_from_logits(a):
class PolicyHead (line 108) | class PolicyHead(torch.nn.Module):
method __init__ (line 109) | def __init__(
method _train_forward (line 123) | def _train_forward(self, e: torch.Tensor, g: torch.Tensor):
method _eval_forward (line 135) | def _eval_forward(self, e: torch.Tensor):
method forward (line 158) | def forward(self, e: torch.Tensor, g: torch.Tensor = None):
class ValueHeadCore (line 164) | class ValueHeadCore(torch.nn.Module):
method __init__ (line 165) | def __init__(self, input_size: int, output_size: int):
method forward (line 170) | def forward(self, x: torch.Tensor):
class ValueHead (line 174) | class ValueHead(torch.nn.Module):
method __init__ (line 175) | def __init__(
method forward (line 187) | def forward(self, x: torch.Tensor):
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/modules/torso.py
class TorsoAttentiveModes (line 6) | class TorsoAttentiveModes(torch.nn.Module):
method __init__ (line 7) | def __init__(self, input_dim: int):
method forward (line 15) | def forward(self, x1, x2, x3):
class TorsoModel (line 29) | class TorsoModel(torch.nn.Module):
method __init__ (line 40) | def __init__(
method forward (line 68) | def forward(self, x: torch.Tensor, scalars: torch.Tensor):
FILE: optimization/open_alpha_tensor/open_alpha_tensor/core/training.py
function _single_act (line 21) | def _single_act(
function swap_data (line 62) | def swap_data(
class Trainer (line 100) | class Trainer:
method __init__ (line 107) | def __init__(
method train_step (line 198) | def train_step(self):
method act_step (line 218) | def act_step(
method train (line 310) | def train(
FILE: optimization/open_alpha_tensor/open_alpha_tensor/operations/checkpoint_op.py
function optimizer_to (line 15) | def optimizer_to(optim: torch.optim.Optimizer, device: str):
class LoadCheckPointOp (line 30) | class LoadCheckPointOp(Operation):
method __init__ (line 34) | def __init__(self):
method execute (line 40) | def execute(
method get_last_epoch (line 81) | def get_last_epoch(self) -> int:
method get_model (line 85) | def get_model(self) -> AlphaTensorModel:
method get_optimizer (line 89) | def get_optimizer(self) -> torch.optim.Optimizer:
method get_result (line 93) | def get_result(self) -> Any:
class LoadCheckpointDataOp (line 97) | class LoadCheckpointDataOp(Operation):
method __init__ (line 101) | def __init__(self):
method execute (line 105) | def execute(self, games_store_dir: Path, trainer: Trainer):
method get_result (line 121) | def get_result(self) -> bool:
FILE: optimization/open_alpha_tensor/open_alpha_tensor/operations/model_op.py
class BuildModelOp (line 11) | class BuildModelOp(Operation):
method __init__ (line 14) | def __init__(self):
method execute (line 18) | def execute(
method get_model (line 52) | def get_model(self) -> AlphaTensorModel:
method get_result (line 56) | def get_result(self) -> Any:
class BuildOptimizerOp (line 60) | class BuildOptimizerOp(Operation):
method __init__ (line 63) | def __init__(self):
method execute (line 67) | def execute(
method get_optimizer (line 94) | def get_optimizer(self) -> torch.optim.Optimizer:
method get_result (line 98) | def get_result(self) -> Any:
class SaveModelOp (line 102) | class SaveModelOp(Operation):
method get_result (line 107) | def get_result(self) -> Any:
method execute (line 110) | def execute(
FILE: optimization/open_alpha_tensor/open_alpha_tensor/operations/training_op.py
class TrainingOperation (line 12) | class TrainingOperation(Operation):
method __init__ (line 16) | def __init__(self):
method execute (line 22) | def execute(
method get_trained_model (line 137) | def get_trained_model(self):
method get_result (line 141) | def get_result(self) -> Any:
FILE: optimization/open_alpha_tensor/open_alpha_tensor/root_op.py
class TrainAlphaTensorRootOp (line 15) | class TrainAlphaTensorRootOp(Operation):
method __init__ (line 19) | def __init__(self):
method execute (line 30) | def execute(
method get_result (line 181) | def get_result(self) -> AlphaTensorModel:
FILE: optimization/speedster/notebooks/huggingface/faster_transformer_bert.py
function prepare_examples (line 39) | def prepare_examples(tokenizer, len_dataset=1000):
function optimize_no_trace (line 91) | def optimize_no_trace(model, data_type="fp16"):
function optimize_with_trace (line 104) | def optimize_with_trace(
function benchmark (line 126) | def benchmark(model, model_desc="original BERT"):
FILE: optimization/speedster/speedster/api/functions.py
function optimize_model (line 22) | def optimize_model(
FILE: optimization/speedster/speedster/api/tests/test_huggingface.py
function test_torch_huggingface_ort_input_text (line 14) | def test_torch_huggingface_ort_input_text():
function test_torch_huggingface_ort_input_tensors (line 88) | def test_torch_huggingface_ort_input_tensors():
function test_torch_huggingface_torchscript_input_tensors (line 152) | def test_torch_huggingface_torchscript_input_tensors():
function test_tensorflow_huggingface_ort_input_text_np (line 196) | def test_tensorflow_huggingface_ort_input_text_np():
function test_tensorflow_huggingface_ort_input_tensors_np (line 266) | def test_tensorflow_huggingface_ort_input_tensors_np():
function test_tensorflow_huggingface_ort_input_text_tf (line 324) | def test_tensorflow_huggingface_ort_input_text_tf():
function test_tensorflow_huggingface_ort_input_tensors_tf (line 394) | def test_tensorflow_huggingface_ort_input_tensors_tf():
FILE: optimization/speedster/speedster/api/tests/test_onnx.py
function test_onnx_ort (line 27) | def test_onnx_ort():
function test_onnx_ort_quant (line 75) | def test_onnx_ort_quant():
function test_onnx_tensorrt (line 118) | def test_onnx_tensorrt():
function test_onnx_openvino (line 160) | def test_onnx_openvino():
function test_onnx_tvm (line 202) | def test_onnx_tvm():
FILE: optimization/speedster/speedster/api/tests/test_pytorch.py
function test_torch_ort (line 40) | def test_torch_ort():
function test_torch_ort_quant (line 72) | def test_torch_ort_quant():
function test_torch_torchscript (line 98) | def test_torch_torchscript():
function test_torch_torch_dynamo (line 127) | def test_torch_torch_dynamo():
function test_torch_tensorrt (line 162) | def test_torch_tensorrt():
function test_torch_openvino (line 192) | def test_torch_openvino():
function test_torch_tvm (line 220) | def test_torch_tvm():
function test_torch_bladedisc (line 249) | def test_torch_bladedisc():
FILE: optimization/speedster/speedster/api/tests/test_tensorflow.py
function test_tensorflow_ort (line 46) | def test_tensorflow_ort():
function test_tensorflow_tf_backend (line 78) | def test_tensorflow_tf_backend():
function test_tensorflow_tflite (line 107) | def test_tensorflow_tflite():
function test_tensorflow_tensorrt (line 137) | def test_tensorflow_tensorrt():
function test_tensorflow_openvino (line 166) | def test_tensorflow_openvino():
function test_tensorflow_tvm (line 195) | def test_tensorflow_tvm():
FILE: optimization/speedster/speedster/api/tests/utils.py
function torch_to_onnx (line 10) | def torch_to_onnx(model, input_data, output_path):
FILE: optimization/speedster/speedster/root_op.py
function _convert_technique (line 40) | def _convert_technique(technique: str):
function _get_model_len (line 52) | def _get_model_len(model: Any):
class SpeedsterRootOp (line 64) | class SpeedsterRootOp(Operation):
method __init__ (line 65) | def __init__(self):
method _send_feedback (line 70) | def _send_feedback(
method execute (line 115) | def execute(
FILE: optimization/speedster/speedster/speedster.py
class SpeedsterApp (line 6) | class SpeedsterApp(App):
method __init__ (line 7) | def __init__(self):
method execute (line 11) | def execute(self, *args, **kwargs):
FILE: optimization/speedster/speedster/tests/test_root_op.py
function test_root_op_no_optim_model (line 6) | def test_root_op_no_optim_model(mocker):
function test_root_op_optim_model (line 35) | def test_root_op_optim_model(mocker):
Condensed preview — 306 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,787K chars).
[
{
"path": ".gitignore",
"chars": 1995,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": "CITATION.cff",
"chars": 435,
"preview": "cff-version: 1.2.0\nmessage: \"If you use this software, please cite it as below.\"\nauthors:\n- family-names: \"Nebuly\"\n giv"
},
{
"path": "CODE_OF_CONDUCT.md",
"chars": 5218,
"preview": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make participa"
},
{
"path": "README.md",
"chars": 1635,
"preview": "# OptiMate\n\n**[Legacy]**\n\nThis repository is now in a legacy phase and is no longer actively maintained. Although the so"
},
{
"path": "monitoring/nebuly/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/.github/workflows/tests.yml",
"chars": 3684,
"preview": "name: Run tests\n\non:\n push:\n branches:\n - \"main\"\n paths-ignore:\n - \".github/**\"\n - \"*.md\"\n - "
},
{
"path": "optimization/chatllama/LICENSE",
"chars": 35146,
"preview": " GNU GENERAL PUBLIC LICENSE\n Version 3, 29 June 2007\n\n Copyright (C) 2007 Free "
},
{
"path": "optimization/chatllama/README.md",
"chars": 19273,
"preview": "# **🦙 ChatLLaMA**\r\n\r\n> :warning: Please note this library does NOT contain LLaMA’s weights; to access the weights, you n"
},
{
"path": "optimization/chatllama/artifacts/config/config.yaml",
"chars": 3922,
"preview": "---\ntrainer_config:\n # learning rates\n actor_lr: 0.000005\n critic_lr: 0.000009\n # PPO Hyperparameters\n actor_eps_cl"
},
{
"path": "optimization/chatllama/artifacts/config/ds_config.json",
"chars": 1321,
"preview": "{\n \"train_batch_size\": 8,\n \"gradient_accumulation_steps\": 1,\n \"optimizer\": {\n \"type\": \"Adam\",\n \"param"
},
{
"path": "optimization/chatllama/artifacts/config/peft_config.yaml",
"chars": 64,
"preview": "---\ninference_mode: False\nr: 8\nlora_alpha: 32\nlora_dropout: 0.1\n"
},
{
"path": "optimization/chatllama/artifacts/datasets/actor_dataset.json",
"chars": 117,
"preview": "[\n {\n \"user_input\": \"here the input of the user\",\n \"completion\": \"here the model completion\"\n }\n]"
},
{
"path": "optimization/chatllama/artifacts/datasets/reward_dataset.json",
"chars": 328,
"preview": "[\n {\n \"user_input\": \"here type the user input\",\n \"completion\": \"here type the completion\",\n \"sco"
},
{
"path": "optimization/chatllama/artifacts/datasets/rlhf_dataset.json",
"chars": 70,
"preview": "[\n {\n \"user_input\": \"here the example of user input\"\n }\n]"
},
{
"path": "optimization/chatllama/artifacts/download_dataset.py",
"chars": 1306,
"preview": "import argparse\nimport os\n\nfrom chatllama.rlhf.dataset import AnthropicRLHF, StanfordNLPSHPDataset\n\n\nif __name__ == \"__m"
},
{
"path": "optimization/chatllama/artifacts/extend_rlhf_dataset.py",
"chars": 3590,
"preview": "import os.path\n\nimport numpy as np\nfrom langchain import OpenAI, LLMChain, PromptTemplate\nfrom transformers import AutoT"
},
{
"path": "optimization/chatllama/artifacts/generate_actor_dataset.py",
"chars": 3295,
"preview": "from langchain import OpenAI, LLMChain, PromptTemplate\nfrom langchain.chains.conversation.memory import (\n Conversati"
},
{
"path": "optimization/chatllama/artifacts/generate_rewards.py",
"chars": 5191,
"preview": "import argparse\nimport json\n\nfrom langchain import OpenAI, LLMChain, PromptTemplate\n\n\nclass ScoreGenerator:\n def __in"
},
{
"path": "optimization/chatllama/artifacts/main.py",
"chars": 2268,
"preview": "import argparse\n\nfrom chatllama.rlhf.actor import ActorTrainer\nfrom chatllama.rlhf.config import Config\nfrom chatllama.r"
},
{
"path": "optimization/chatllama/artifacts/templates.json",
"chars": 139,
"preview": "{\n \"rlhf\": \"You are an AI assistant used to generate possible prompts instructions for a chatbot, here is an example "
},
{
"path": "optimization/chatllama/chatllama/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/chatllama/chatllama/langchain_modules/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/chatllama/chatllama/langchain_modules/prompt_templates.py",
"chars": 2780,
"preview": "REWARD_TEMPLATE = dict(\n template=(\n \"You have to evaluate the following chat with a score between 0 and 5\"\n "
},
{
"path": "optimization/chatllama/chatllama/llama_model.py",
"chars": 22913,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms\n"
},
{
"path": "optimization/chatllama/chatllama/rlhf/__init__.py",
"chars": 66,
"preview": "\"\"\"RLHF implementation inspired to Lucidrains' implementation.\"\"\"\n"
},
{
"path": "optimization/chatllama/chatllama/rlhf/actor.py",
"chars": 26106,
"preview": "import json\nimport yaml\nimport os\nimport shutil\n\nimport deepspeed\nimport torch\nfrom accelerate import Accelerator\nfrom b"
},
{
"path": "optimization/chatllama/chatllama/rlhf/config.py",
"chars": 11660,
"preview": "import yaml\nimport os\nfrom dataclasses import dataclass\n\nimport torch\nfrom beartype import beartype\nfrom beartype.typing"
},
{
"path": "optimization/chatllama/chatllama/rlhf/dataset.py",
"chars": 15058,
"preview": "import json\nimport os\n\nimport numpy as np\n\nfrom beartype.typing import Dict, List, Union\nfrom datasets import load_datas"
},
{
"path": "optimization/chatllama/chatllama/rlhf/model_list.py",
"chars": 1333,
"preview": "# llama models\nllama_models = [\"llama-7B\", \"llama-13B\", \"llama-33B\", \"llama-65B\"]\n\n# HF Models\n# encoder-decoder models "
},
{
"path": "optimization/chatllama/chatllama/rlhf/model_loader.py",
"chars": 14038,
"preview": "import os\nimport shutil\n\nfrom beartype.typing import Union, Optional, Tuple\n\nfrom chatllama.rlhf.config import (\n Con"
},
{
"path": "optimization/chatllama/chatllama/rlhf/reward.py",
"chars": 22087,
"preview": "import json\nimport shutil\nimport os\n\nimport deepspeed\nimport torch\nfrom accelerate import Accelerator\nfrom beartype impo"
},
{
"path": "optimization/chatllama/chatllama/rlhf/trainer.py",
"chars": 47185,
"preview": "import json\nimport os\nimport random\nfrom collections import deque, namedtuple\n\nimport deepspeed\nimport torch\nimport torc"
},
{
"path": "optimization/chatllama/chatllama/rlhf/utils.py",
"chars": 6844,
"preview": "import json\nimport os\nfrom beartype import beartype\nfrom plotly import graph_objects as go\n\n\nclass TrainingStats:\n \"\""
},
{
"path": "optimization/chatllama/setup.py",
"chars": 657,
"preview": "from pathlib import Path\nfrom setuptools import setup, find_packages\n\n\nREQUIREMENTS = [\n \"accelerate\",\n \"beartype\""
},
{
"path": "optimization/cloud_surfer/README.md",
"chars": 1465,
"preview": "# 🏄 CloudSurfer (WIP)\nAutomatically discover the optimal cloud configuration and hardware on AWS, GCP and Azure to run y"
},
{
"path": "optimization/forward_forward/README.md",
"chars": 5736,
"preview": "# Forward-Forward Algorithm\n\nThis module implements a complete open-source version of [Geoffrey Hinton's Forward Forward"
},
{
"path": "optimization/forward_forward/forward_forward/__init__.py",
"chars": 101,
"preview": "from forward_forward.api.functions import ( # noqa F401\n train_with_forward_forward_algorithm,\n)\n"
},
{
"path": "optimization/forward_forward/forward_forward/api/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/forward_forward/forward_forward/api/functions.py",
"chars": 1511,
"preview": "from torchvision import datasets\n\nfrom forward_forward.root_op import (\n ForwardForwardRootOp,\n ForwardForwardMode"
},
{
"path": "optimization/forward_forward/forward_forward/app.py",
"chars": 316,
"preview": "from nebullvm.apps.base import App\n\nfrom forward_forward.root_op import ForwardForwardRootOp\n\n\nclass ForwardForwardApp(A"
},
{
"path": "optimization/forward_forward/forward_forward/operations/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/forward_forward/forward_forward/operations/build_models.py",
"chars": 2906,
"preview": "from abc import ABC, abstractmethod\n\nimport torch\n\nfrom nebullvm.operations.base import Operation\n\nfrom forward_forward."
},
{
"path": "optimization/forward_forward/forward_forward/operations/data.py",
"chars": 4122,
"preview": "import urllib.request\nfrom typing import Any\n\nimport torch\nimport torch.utils.data\nfrom nebullvm.operations.base import "
},
{
"path": "optimization/forward_forward/forward_forward/operations/fetch_operations.py",
"chars": 541,
"preview": "from typing import Any\n\nfrom nebullvm.operations.base import Operation\nfrom torch.utils.data import DataLoader\n\n\nclass F"
},
{
"path": "optimization/forward_forward/forward_forward/operations/trainers.py",
"chars": 6901,
"preview": "from abc import ABC, abstractmethod\n\nimport torch\nfrom nebullvm.operations.base import Operation\nfrom nebullvm.operation"
},
{
"path": "optimization/forward_forward/forward_forward/root_op.py",
"chars": 3126,
"preview": "from enum import Enum\n\nfrom nebullvm.operations.base import Operation\n\nfrom forward_forward.operations.build_models impo"
},
{
"path": "optimization/forward_forward/forward_forward/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/forward_forward/forward_forward/utils/labels.py",
"chars": 2354,
"preview": "from typing import List\n\nimport torch\n\n\nclass LabelsInjector:\n def __init__(self, labels: List):\n # save label"
},
{
"path": "optimization/forward_forward/forward_forward/utils/modules.py",
"chars": 28510,
"preview": "from abc import ABC, abstractmethod\nfrom typing import List\n\nimport torch\nimport torch.utils.data\n\nfrom forward_forward."
},
{
"path": "optimization/forward_forward/forward_forward/utils/utils.py",
"chars": 873,
"preview": "from collections import Generator\n\nimport torch.utils.data\n\n\nclass ProgressiveTrainingDataset(torch.utils.data.Dataset):"
},
{
"path": "optimization/forward_forward/requirements.txt",
"chars": 43,
"preview": "torch>=1.9\ntorchvision>=0.10\nnebullvm>=0.6\n"
},
{
"path": "optimization/forward_forward/setup.py",
"chars": 516,
"preview": "from pathlib import Path\nfrom setuptools import setup, find_packages\n\n\nREQUIREMENTS = [\n \"torch>=1.9\",\n \"torchvisi"
},
{
"path": "optimization/large_speedster/README.md",
"chars": 1653,
"preview": "# ⚡ LargeSpeedster App (WIP)\nAutomatically apply SOTA optimization techniques on large AI models to achieve the maximum "
},
{
"path": "optimization/nebullvm/.pre-commit-config.yaml",
"chars": 267,
"preview": "repos:\n - repo: https://github.com/ambv/black\n rev: 22.3.0\n hooks:\n - id: black\n args: [--line-length"
},
{
"path": "optimization/nebullvm/CONTRIBUTING.md",
"chars": 3684,
"preview": "# Guidelines for Contributing to Nebullvm 🚀\n\nHello coder 👋\n\nWe are very happy that you have decided to contribute to the"
},
{
"path": "optimization/nebullvm/Dockerfile",
"chars": 2411,
"preview": "ARG STARTING_IMAGE=nvcr.io/nvidia/tensorrt:23.03-py3\nFROM ${STARTING_IMAGE}\n\nWORKDIR /\n\n# Set frontend as non-interactiv"
},
{
"path": "optimization/nebullvm/LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "optimization/nebullvm/MANIFEST.in",
"chars": 103,
"preview": "recursive-include nebullvm/installers/tvm_installers *.cmake\nrecursive-include nebullvm/installers *.sh"
},
{
"path": "optimization/nebullvm/README.md",
"chars": 3407,
"preview": "<p align=\"center\">\n<br><br><br>\n<a https://docs.nebuly.com/welcome/quick-start\"><img src=\"https://user-images.githubuser"
},
{
"path": "optimization/nebullvm/azure-pipelines.yml",
"chars": 2306,
"preview": "trigger:\n branches:\n include:\n - main\n paths:\n exclude:\n - .github/*\n - docs/**\n - README.md"
},
{
"path": "optimization/nebullvm/docker_build.sh",
"chars": 589,
"preview": "# Create image with all compilers installed\ndocker build -t nebulydocker/nebullvm:cuda11.2.0-nebullvm0.3.1-allcompilers "
},
{
"path": "optimization/nebullvm/docs/Makefile",
"chars": 632,
"preview": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the "
},
{
"path": "optimization/nebullvm/docs/README.md",
"chars": 374,
"preview": "# Documentation\nNebullvm documentation is built using Sphynx and furo! You can follow the guide below for\n## Build the d"
},
{
"path": "optimization/nebullvm/docs/conf.py",
"chars": 3817,
"preview": "# Configuration file for the Sphinx documentation builder.\n#\n# This file only contains a selection of the most common op"
},
{
"path": "optimization/nebullvm/docs/index.rst",
"chars": 125,
"preview": "Welcome to nebullvm's documentation!\n======================================\n\n.. toctree::\n :maxdepth: 2\n\n modules/in"
},
{
"path": "optimization/nebullvm/docs/modules/api.rst",
"chars": 141,
"preview": "nebullvm.api\n=============\n\n.. automodule:: nebullvm\n :members:\n \n.. automodule:: nebullvm.api.frontend.huggingfac"
},
{
"path": "optimization/nebullvm/docs/modules/converters.rst",
"chars": 91,
"preview": "nebullvm.converters\n===================\n\n.. automodule:: nebullvm.converters\n :members:\n"
},
{
"path": "optimization/nebullvm/docs/modules/index.rst",
"chars": 132,
"preview": "API Documentation\n==================\n\n.. toctree::\n \n api\n converters\n inference_learners\n installers\n "
},
{
"path": "optimization/nebullvm/docs/modules/inference_learners.rst",
"chars": 115,
"preview": "nebullvm.inference_learners\n===========================\n\n.. automodule:: nebullvm.inference_learners\n :members:\n"
},
{
"path": "optimization/nebullvm/docs/modules/installers.rst",
"chars": 91,
"preview": "nebullvm.installers\n===================\n\n.. automodule:: nebullvm.installers\n :members:\n"
},
{
"path": "optimization/nebullvm/docs/modules/optimizers.rst",
"chars": 91,
"preview": "nebullvm.optimizers\n===================\n\n.. automodule:: nebullvm.optimizers\n :members:\n"
},
{
"path": "optimization/nebullvm/docs/requirements-docs.txt",
"chars": 36,
"preview": "Sphinx==4.5.0\ncoloredlogs\nsympy\nfuro"
},
{
"path": "optimization/nebullvm/nebullvm/__init__.py",
"chars": 341,
"preview": "# The torch import is necessary for a strange issue when\n# using cuda 11.8, if torch is imported after\n# tensorflow it g"
},
{
"path": "optimization/nebullvm/nebullvm/api/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/apps/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/apps/base.py",
"chars": 178,
"preview": "import abc\n\n\nclass App(abc.ABC):\n def __init__(self):\n super().__init__()\n\n @abc.abstractmethod\n def exe"
},
{
"path": "optimization/nebullvm/nebullvm/config.py",
"chars": 2153,
"preview": "from nebullvm.optional_modules.torch import torch\n\n\nVERSION = \"0.10.0\"\nLEARNER_METADATA_FILENAME = \"metadata.json\"\nONNX_"
},
{
"path": "optimization/nebullvm/nebullvm/core/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/core/models.py",
"chars": 10522,
"preview": "import subprocess\nfrom dataclasses import dataclass\nfrom enum import Enum\nfrom functools import cached_property\nfrom typ"
},
{
"path": "optimization/nebullvm/nebullvm/core/tests/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/core/tests/test_models.py",
"chars": 6366,
"preview": "import unittest\nfrom unittest.mock import MagicMock\n\nfrom nebullvm.core.models import OptimizeInferenceResult\n\n\nclass Te"
},
{
"path": "optimization/nebullvm/nebullvm/core/types.py",
"chars": 142,
"preview": "from typing import Union, Iterable, Sequence\n\nfrom nebullvm.tools.data import DataManager\n\nInputData = Union[Iterable, S"
},
{
"path": "optimization/nebullvm/nebullvm/installers/__init__.py",
"chars": 81,
"preview": "# flake8: noqa\n\n__all__ = [k for k in globals().keys() if not k.startswith(\"_\")]\n"
},
{
"path": "optimization/nebullvm/nebullvm/installers/auto_installer.py",
"chars": 6556,
"preview": "import argparse\nfrom typing import List, Union\n\nfrom loguru import logger\n\nfrom nebullvm.config import (\n ONNX_MODULE"
},
{
"path": "optimization/nebullvm/nebullvm/installers/install_bladedisc.sh",
"chars": 1196,
"preview": "#!/bin/bash\n\n# Set non interactive mode for apt-get\nexport DEBIAN_FRONTEND=noninteractive\n\nif [ ! -d \"BladeDISC\" ]\nthen\n"
},
{
"path": "optimization/nebullvm/nebullvm/installers/install_fastertransformer.sh",
"chars": 1006,
"preview": "#!/bin/bash\n\n# TODO: check requirements\n# https://github.com/NVIDIA/FasterTransformer/blob/main/docs/bert_guide.md\n# Req"
},
{
"path": "optimization/nebullvm/nebullvm/installers/install_tensor_rt.sh",
"chars": 2114,
"preview": "#!/bin/bash\n\nif [[ \"$(grep '^ID_LIKE' /etc/os-release)\" == *\"centos\"* ]]\nthen\n # Installation for centos type linux dis"
},
{
"path": "optimization/nebullvm/nebullvm/installers/install_tvm.sh",
"chars": 600,
"preview": "#!/bin/bash\n\n# Set non interactive mode for apt-get\nexport DEBIAN_FRONTEND=noninteractive\n\nif [ ! -d \"tvm\" ]\nthen\n git "
},
{
"path": "optimization/nebullvm/nebullvm/installers/install_tvm_prerequisites.sh",
"chars": 1174,
"preview": "#!/bin/bash\n\n# Set non interactive mode for apt-get\nexport DEBIAN_FRONTEND=noninteractive\n\nif [[ $OSTYPE == \"darwin\"* ]]"
},
{
"path": "optimization/nebullvm/nebullvm/installers/installers.py",
"chars": 17391,
"preview": "import os\nimport platform\nimport subprocess\nimport sys\nfrom abc import ABC\nfrom pathlib import Path\nfrom typing import L"
},
{
"path": "optimization/nebullvm/nebullvm/installers/tests/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/installers/tests/test_install_frameworks.py",
"chars": 5857,
"preview": "from nebullvm.installers.auto_installer import (\n select_frameworks_to_install,\n select_compilers_to_install,\n)\n\n\n"
},
{
"path": "optimization/nebullvm/nebullvm/installers/tvm_installers/arm/config.cmake",
"chars": 11553,
"preview": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE f"
},
{
"path": "optimization/nebullvm/nebullvm/installers/tvm_installers/arm_cuda/config.cmake",
"chars": 11552,
"preview": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE f"
},
{
"path": "optimization/nebullvm/nebullvm/installers/tvm_installers/x86/config.cmake",
"chars": 11553,
"preview": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE f"
},
{
"path": "optimization/nebullvm/nebullvm/installers/tvm_installers/x86_cuda/config.cmake",
"chars": 11552,
"preview": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE f"
},
{
"path": "optimization/nebullvm/nebullvm/operations/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/base.py",
"chars": 1111,
"preview": "import abc\nfrom typing import Dict, Union\n\nfrom loguru import logger\n\nfrom nebullvm.core.models import Device, DeviceTyp"
},
{
"path": "optimization/nebullvm/nebullvm/operations/conversions/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/conversions/converters.py",
"chars": 4537,
"preview": "import abc\nfrom pathlib import Path\nfrom typing import Optional, List, Union\n\nfrom nebullvm.core.models import DeviceTyp"
},
{
"path": "optimization/nebullvm/nebullvm/operations/conversions/huggingface.py",
"chars": 5257,
"preview": "from typing import (\n List,\n Dict,\n Sequence,\n Optional,\n)\n\nimport numpy as np\n\nfrom nebullvm.core.models im"
},
{
"path": "optimization/nebullvm/nebullvm/operations/conversions/pytorch.py",
"chars": 5688,
"preview": "from contextlib import nullcontext\nfrom pathlib import Path\n\nfrom loguru import logger\n\nfrom nebullvm.config import ONNX"
},
{
"path": "optimization/nebullvm/nebullvm/operations/conversions/tensorflow.py",
"chars": 3917,
"preview": "import subprocess\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\nfrom typing import Union\n\nfrom loguru"
},
{
"path": "optimization/nebullvm/nebullvm/operations/conversions/utils.py",
"chars": 539,
"preview": "from nebullvm.core.models import DeepLearningFramework\nfrom nebullvm.operations.conversions.converters import (\n Pyto"
},
{
"path": "optimization/nebullvm/nebullvm/operations/fetch_operations/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/fetch_operations/local.py",
"chars": 574,
"preview": "from typing import Any, Union, Iterable, Sequence\n\nfrom nebullvm.operations.base import Operation\n\n\nclass FetchModelFrom"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/base.py",
"chars": 21216,
"preview": "import json\nimport os\nimport shutil\nfrom abc import ABC, abstractmethod\nfrom dataclasses import dataclass, InitVar\nfrom "
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/blade_disc.py",
"chars": 908,
"preview": "from typing import Optional\n\nfrom nebullvm.core.models import ModelParams, Device\nfrom nebullvm.operations.inference_lea"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/builders.py",
"chars": 11248,
"preview": "from pathlib import Path\nfrom typing import Any, Union\n\nfrom nebullvm.core.models import (\n ModelParams,\n DeepLear"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/deepsparse.py",
"chars": 6151,
"preview": "import os\nimport shutil\nfrom abc import ABC\nfrom pathlib import Path\nfrom typing import Union, List, Generator, Tuple, D"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/faster_transformer.py",
"chars": 259,
"preview": "from nebullvm.operations.inference_learners.torchscript import (\n TorchScriptInferenceLearner,\n)\n\n\nclass FasterTransf"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/huggingface.py",
"chars": 6384,
"preview": "from abc import ABC\nfrom collections import OrderedDict\nfrom pathlib import Path\nfrom typing import List, Any, Dict, Uni"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/neural_compressor.py",
"chars": 6183,
"preview": "from abc import ABC\nfrom pathlib import Path\nfrom typing import Union, Tuple, Dict, Type\n\nfrom loguru import logger\n\nfro"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/onnx.py",
"chars": 14937,
"preview": "import multiprocessing\nimport os\nimport shutil\nfrom abc import ABC\nfrom pathlib import Path\nfrom typing import Union, Li"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/openvino.py",
"chars": 15033,
"preview": "import json\nimport shutil\nfrom abc import ABC\nfrom pathlib import Path\nfrom typing import Dict, Union, Type, Generator, "
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/tensor_rt.py",
"chars": 23323,
"preview": "import json\nimport os\nfrom abc import ABC\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\nfrom typing i"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/tensorflow.py",
"chars": 4741,
"preview": "import pickle\nfrom pathlib import Path\nfrom typing import Tuple, Union, Dict, Type\n\nfrom nebullvm.config import TENSORFL"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/torch_dynamo.py",
"chars": 641,
"preview": "from pathlib import Path\nfrom typing import Union\n\nfrom nebullvm.operations.inference_learners.torchscript import (\n "
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/torch_neuron.py",
"chars": 562,
"preview": "import os\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\n\nfrom nebullvm.operations.inference_learners."
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/torch_xla.py",
"chars": 2918,
"preview": "import os\nimport pickle\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\nfrom typing import Tuple, Union"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/torchscript.py",
"chars": 3513,
"preview": "from pathlib import Path\nfrom typing import Tuple, Union, Optional, List\n\nfrom nebullvm.core.models import Device, Devic"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/tvm.py",
"chars": 16668,
"preview": "import os\nimport shutil\nfrom abc import ABC\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\nfrom typing"
},
{
"path": "optimization/nebullvm/nebullvm/operations/inference_learners/utils.py",
"chars": 1482,
"preview": "from pathlib import Path\nfrom typing import Union, Any\n\nfrom nebullvm.operations.inference_learners.base import LearnerM"
},
{
"path": "optimization/nebullvm/nebullvm/operations/measures/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/measures/base.py",
"chars": 276,
"preview": "import abc\n\nfrom nebullvm.operations.base import Operation\n\n\nclass Measure(Operation, abc.ABC):\n def __init__(self):\n"
},
{
"path": "optimization/nebullvm/nebullvm/operations/measures/measures.py",
"chars": 3696,
"preview": "from typing import List, Tuple, Any, Callable, Dict\n\nimport numpy as np\n\nfrom nebullvm.config import QUANTIZATION_DATA_N"
},
{
"path": "optimization/nebullvm/nebullvm/operations/measures/utils.py",
"chars": 8102,
"preview": "import time\nfrom typing import Tuple, List, Union, Any\n\nimport numpy as np\nfrom loguru import logger\n\nfrom nebullvm.conf"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/base.py",
"chars": 722,
"preview": "import abc\nfrom typing import Any, Dict, List, Optional\n\nfrom nebullvm.core.models import QuantizationType\nfrom nebullvm"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/deepsparse.py",
"chars": 2549,
"preview": "from pathlib import Path\nfrom typing import Union\n\nfrom nebullvm.core.models import (\n ModelParams,\n QuantizationT"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/__init__.py",
"chars": 2322,
"preview": "from copy import deepcopy\nfrom typing import Union\n\nfrom nebullvm.core.models import QuantizationType, DeviceType\nfrom n"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/bert/__init__.py",
"chars": 2298,
"preview": "import os\n\nfrom nebullvm.operations.optimizations.compilers.faster_transformer.bert.modeling_bert import ( # noqa: E501"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/bert/checkpoint_quantization.py",
"chars": 10523,
"preview": "# Based on: https://github.com/NVIDIA/FasterTransformer/blob/4402759e48f2340220638675f464b6ba1f79ac3c/examples/pytorch/b"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/bert/modeling_bert.py",
"chars": 42935,
"preview": "# Based on: https://github.com/NVIDIA/FasterTransformer/blob/4402759e48f2340220638675f464b6ba1f79ac3c/examples/pytorch/b"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/gpt/__init__.py",
"chars": 9481,
"preview": "# Based on: https://github.com/NVIDIA/FasterTransformer/blob/4402759e48f2340220638675f464b6ba1f79ac3c/examples/pytorch/g"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/gpt/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/gpt/utils/gpt_decoder.py",
"chars": 66294,
"preview": "# Based on: https://github.com/NVIDIA/FasterTransformer/blob/4402759e48f2340220638675f464b6ba1f79ac3c/examples/pytorch/g"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/faster_transformer/gpt/utils/huggingface_gpt_convert.py",
"chars": 10381,
"preview": "# Based on https://github.com/NVIDIA/FasterTransformer/blob/4402759e48f2340220638675f464b6ba1f79ac3c/examples/pytorch/gp"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/intel_neural_compressor.py",
"chars": 3135,
"preview": "from pathlib import Path\nfrom typing import Union\n\nfrom nebullvm.core.models import QuantizationType\nfrom nebullvm.opera"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/onnxruntime.py",
"chars": 3105,
"preview": "from pathlib import Path\nfrom typing import Union, List, Tuple\n\nimport numpy as np\n\nfrom nebullvm.config import QUANTIZA"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/openvino.py",
"chars": 4817,
"preview": "import subprocess\nfrom pathlib import Path\nfrom typing import Tuple, List, Union\n\nimport numpy as np\n\nfrom nebullvm.conf"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/intel_neural_compressor.py",
"chars": 3781,
"preview": "from pathlib import Path\nfrom tempfile import TemporaryDirectory\nfrom typing import Any\n\nimport yaml\n\nfrom nebullvm.core"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/onnx.py",
"chars": 4941,
"preview": "from pathlib import Path\nfrom typing import Union, Iterable, Tuple, List\n\nimport cpuinfo\nimport numpy as np\n\nfrom nebull"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/openvino.py",
"chars": 1960,
"preview": "from typing import List, Tuple, Any\n\nimport numpy as np\n\nfrom nebullvm.optional_modules.openvino import (\n DataLoader"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/pytorch.py",
"chars": 5090,
"preview": "import copy\nfrom typing import List, Tuple, Union\n\nfrom loguru import logger\n\nfrom nebullvm.core.models import DeviceTyp"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/tensor_rt.py",
"chars": 2264,
"preview": "from typing import List, Tuple\n\nimport numpy as np\n\nfrom nebullvm.core.models import QuantizationType, ModelParams\nfrom "
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/tensorflow.py",
"chars": 1762,
"preview": "from typing import List, Tuple\n\nfrom nebullvm.core.models import QuantizationType\nfrom nebullvm.optional_modules.tensorf"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/tvm.py",
"chars": 2237,
"preview": "from typing import List, Sequence, Any\n\nfrom nebullvm.config import QUANTIZATION_DATA_NUM\nfrom nebullvm.core.models impo"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/quantizations/utils.py",
"chars": 406,
"preview": "from loguru import logger\n\nfrom nebullvm.core.models import QuantizationType\n\n\ndef check_quantization(\n quantization_"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/tensor_rt.py",
"chars": 16807,
"preview": "import abc\nimport copy\nimport os\nimport subprocess\nfrom pathlib import Path\nfrom typing import List, Any, Tuple\n\nimport "
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/tensorflow.py",
"chars": 5206,
"preview": "from typing import List, Tuple\n\nfrom nebullvm.config import QUANTIZATION_DATA_NUM\nfrom nebullvm.core.models import Quant"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/torch_dynamo.py",
"chars": 2364,
"preview": "from typing import Union, Any\n\nfrom nebullvm.core.models import ModelParams, QuantizationType\nfrom nebullvm.operations.o"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/torch_neuron.py",
"chars": 5663,
"preview": "from typing import List, Tuple\n\nfrom nebullvm.core.models import QuantizationType, ModelParams, DeviceType\nfrom nebullvm"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/torch_xla.py",
"chars": 700,
"preview": "from nebullvm.core.models import QuantizationType\nfrom nebullvm.operations.optimizations.compilers.torchscript import (\n"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/torchscript.py",
"chars": 4674,
"preview": "from typing import Union, List, Tuple\n\nfrom nebullvm.config import QUANTIZATION_DATA_NUM\nfrom nebullvm.core.models impor"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/tvm.py",
"chars": 8751,
"preview": "import abc\nimport os\nimport uuid\nfrom abc import ABC\nfrom typing import Any, Tuple, Dict, Union\n\nfrom nebullvm.config im"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compilers/utils.py",
"chars": 3952,
"preview": "from pathlib import Path\n\nimport nebullvm\nfrom nebullvm.core.models import Device, ModelCompiler, DeviceType\n\n\ndef onnxr"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compressors/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compressors/base.py",
"chars": 1379,
"preview": "from abc import ABC, abstractmethod\nfrom typing import Any, Optional, Dict, Callable, Tuple\n\nimport yaml\n\nfrom nebullvm."
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compressors/intel.py",
"chars": 5966,
"preview": "import copy\nimport re\nfrom abc import ABC, abstractmethod\nfrom pathlib import Path\nfrom tempfile import mkdtemp\nfrom typ"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compressors/scripts/__init__.py",
"chars": 11416,
"preview": "import json\nimport logging\nimport os.path\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\nfrom typing i"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compressors/scripts/neural_magic_training.py",
"chars": 11416,
"preview": "import json\nimport logging\nimport os.path\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\nfrom typing i"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/compressors/sparseml.py",
"chars": 4597,
"preview": "import json\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\nfrom typing import Callable, Dict\n\nimport n"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/optimize_inference.py",
"chars": 12721,
"preview": "from pathlib import Path\nfrom tempfile import TemporaryDirectory\nfrom typing import Any, Iterable, Callable, List, Union"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/optimizers/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/optimizers/base.py",
"chars": 17404,
"preview": "import abc\nfrom tempfile import TemporaryDirectory\nfrom typing import Any, Callable, Dict, List, Tuple, Type, Union\n\nfro"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/optimizers/optimizers.py",
"chars": 4555,
"preview": "import platform\n\nfrom nebullvm.core.models import (\n DeepLearningFramework,\n DeviceType,\n ModelCompiler,\n)\nfrom"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/test_deepsparse.py",
"chars": 4151,
"preview": "from tempfile import TemporaryDirectory\n\nimport pytest\nimport torch\n\nfrom nebullvm.config import CONSTRAINED_METRIC_DROP"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/test_intel_neural_compressor.py",
"chars": 4323,
"preview": "from tempfile import TemporaryDirectory\n\nimport pytest\nimport torch\n\nfrom nebullvm.core.models import (\n DeviceType,\n"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/test_onnxruntime.py",
"chars": 14825,
"preview": "import sys\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\n\nimport onnx\nimport pytest\nimport torch\n\nfro"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/test_openvino.py",
"chars": 5224,
"preview": "from pathlib import Path\nfrom tempfile import TemporaryDirectory\n\nimport cpuinfo\nimport pytest\nimport torch\n\nfrom nebull"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/test_tensor_rt.py",
"chars": 9311,
"preview": "from pathlib import Path\nfrom tempfile import TemporaryDirectory\n\nimport pytest\nimport torch\n\nfrom nebullvm.core.models "
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/test_tensorflow.py",
"chars": 6909,
"preview": "from tempfile import TemporaryDirectory\n\nimport pytest\n\nfrom nebullvm.core.models import (\n DeepLearningFramework,\n "
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/test_torch_dynamo.py",
"chars": 4786,
"preview": "import platform\nfrom tempfile import TemporaryDirectory\n\nimport pytest\nimport torch\n\nfrom nebullvm.core.models import (\n"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/test_torchscript.py",
"chars": 6228,
"preview": "from tempfile import TemporaryDirectory\n\nimport pytest\nimport torch\n\nfrom nebullvm.core.models import (\n DeviceType,\n"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/test_tvm.py",
"chars": 8843,
"preview": "from pathlib import Path\nfrom tempfile import TemporaryDirectory\n\nimport pytest\nimport torch\n\nfrom nebullvm.core.models "
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/tests/utils.py",
"chars": 9672,
"preview": "import os\nfrom pathlib import Path\nfrom typing import Any, Callable, Optional, Tuple\n\nimport tensorflow as tf\nimport ten"
},
{
"path": "optimization/nebullvm/nebullvm/operations/optimizations/utils.py",
"chars": 269,
"preview": "from typing import Callable, List\n\n\ndef map_compilers_and_compressors(ignore_list: List, enum_class: Callable):\n if i"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/blade_disc.py",
"chars": 133,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import torch_blade\nexcept ImportError:\n torch_blade "
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/deepsparse.py",
"chars": 164,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n from deepsparse import compile_model, cpu\nexcept Import"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/diffusers.py",
"chars": 701,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import diffusers # noqa F401\n from diffusers import"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/dummy.py",
"chars": 27,
"preview": "class DummyClass:\n pass\n"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/huggingface.py",
"chars": 896,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n from transformers import PreTrainedModel, CLIPTextModel"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/neural_compressor.py",
"chars": 620,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import neural_compressor # noqa F401\n from neural_c"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/onnx.py",
"chars": 366,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import onnx # noqa F401\nexcept ImportError:\n onnx ="
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/onnxruntime.py",
"chars": 813,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import onnxruntime # noqa F401\n from onnxruntime.qu"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/onnxsim.py",
"chars": 125,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import onnxsim\nexcept ImportError:\n onnxsim = DummyC"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/openvino.py",
"chars": 819,
"preview": "import logging\n\nfrom nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n from openvino.runtime import Core, Mode"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/tensor_rt.py",
"chars": 484,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import tensorrt\n from tensorrt import IInt8EntropyCa"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/tensorflow.py",
"chars": 1187,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import absl.logging\n\n absl.logging.set_verbosity(abs"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/torch.py",
"chars": 1577,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import torch # noqa F401\n from torch.nn import Modu"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/torch_neuron.py",
"chars": 365,
"preview": "import logging\n\nfrom nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import torch_neuron # noqa F401\n\n l"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/torch_tensorrt.py",
"chars": 240,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import torch_tensorrt\n from torch_tensorrt.ptq impor"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/torch_xla.py",
"chars": 191,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import torch_xla\n import torch_xla.core.xla_model as"
},
{
"path": "optimization/nebullvm/nebullvm/optional_modules/tvm.py",
"chars": 687,
"preview": "from nebullvm.optional_modules.dummy import DummyClass\n\ntry:\n import tvm\n from tvm import IRModule\n from tvm.ru"
}
]
// ... and 106 more files (download for full content)
About this extraction
This page contains the full source code of the nebuly-ai/optimate GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 306 files (1.6 MB), approximately 395.7k tokens, and a symbol index with 1553 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.