Repository: openai/jukebox
Branch: master
Commit: 08efbbc1d4ed
Files: 319
Total size: 1.7 MB

Directory structure:
gitextract_kyecer1w/

├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── apex/
│   ├── .gitignore
│   ├── .nojekyll
│   ├── LICENSE
│   ├── README.md
│   ├── apex/
│   │   ├── RNN/
│   │   │   ├── README.md
│   │   │   ├── RNNBackend.py
│   │   │   ├── __init__.py
│   │   │   ├── cells.py
│   │   │   └── models.py
│   │   ├── __init__.py
│   │   ├── amp/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── __version__.py
│   │   │   ├── _amp_state.py
│   │   │   ├── _initialize.py
│   │   │   ├── _process_optimizer.py
│   │   │   ├── amp.py
│   │   │   ├── compat.py
│   │   │   ├── frontend.py
│   │   │   ├── handle.py
│   │   │   ├── lists/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── functional_overrides.py
│   │   │   │   ├── tensor_overrides.py
│   │   │   │   └── torch_overrides.py
│   │   │   ├── opt.py
│   │   │   ├── rnn_compat.py
│   │   │   ├── scaler.py
│   │   │   ├── utils.py
│   │   │   └── wrap.py
│   │   ├── fp16_utils/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── fp16_optimizer.py
│   │   │   ├── fp16util.py
│   │   │   └── loss_scaler.py
│   │   ├── multi_tensor_apply/
│   │   │   ├── __init__.py
│   │   │   └── multi_tensor_apply.py
│   │   ├── normalization/
│   │   │   ├── __init__.py
│   │   │   └── fused_layer_norm.py
│   │   ├── optimizers/
│   │   │   ├── __init__.py
│   │   │   ├── fp16_optimizer.py
│   │   │   └── fused_adam.py
│   │   ├── parallel/
│   │   │   ├── LARC.py
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── distributed.py
│   │   │   ├── multiproc.py
│   │   │   ├── optimized_sync_batchnorm.py
│   │   │   ├── optimized_sync_batchnorm_kernel.py
│   │   │   ├── sync_batchnorm.py
│   │   │   └── sync_batchnorm_kernel.py
│   │   └── reparameterization/
│   │       ├── README.md
│   │       ├── __init__.py
│   │       ├── reparameterization.py
│   │       └── weight_norm.py
│   ├── apex.patch
│   ├── csrc/
│   │   ├── amp_C_frontend.cpp
│   │   ├── flatten_unflatten.cpp
│   │   ├── fused_adam_cuda.cpp
│   │   ├── fused_adam_cuda_kernel.cu
│   │   ├── layer_norm_cuda.cpp
│   │   ├── layer_norm_cuda_kernel.cu
│   │   ├── multi_tensor_apply.cuh
│   │   ├── multi_tensor_axpby_kernel.cu
│   │   ├── multi_tensor_l2norm_kernel.cu
│   │   ├── multi_tensor_lamb_stage_1.cu
│   │   ├── multi_tensor_lamb_stage_2.cu
│   │   ├── multi_tensor_scale_kernel.cu
│   │   ├── syncbn.cpp
│   │   ├── type_shim.h
│   │   └── welford.cu
│   ├── docs/
│   │   ├── Makefile
│   │   └── source/
│   │       ├── _static/
│   │       │   └── css/
│   │       │       └── pytorch_theme.css
│   │       ├── _templates/
│   │       │   └── layout.html
│   │       ├── advanced.rst
│   │       ├── amp.rst
│   │       ├── conf.py
│   │       ├── fp16_utils.rst
│   │       ├── index.rst
│   │       ├── layernorm.rst
│   │       ├── optimizers.rst
│   │       └── parallel.rst
│   ├── examples/
│   │   ├── README.md
│   │   ├── dcgan/
│   │   │   └── README.md
│   │   ├── docker/
│   │   │   ├── Dockerfile
│   │   │   └── README.md
│   │   ├── imagenet/
│   │   │   ├── README.md
│   │   │   └── main_amp.py
│   │   └── simple/
│   │       └── distributed/
│   │           ├── README.md
│   │           ├── distributed_data_parallel.py
│   │           └── run.sh
│   ├── setup.py
│   └── tests/
│       ├── L0/
│       │   ├── run_amp/
│       │   │   ├── __init__.py
│       │   │   ├── test_add_param_group.py
│       │   │   ├── test_basic_casts.py
│       │   │   ├── test_cache.py
│       │   │   ├── test_multi_tensor_axpby.py
│       │   │   ├── test_multi_tensor_l2norm.py
│       │   │   ├── test_multi_tensor_scale.py
│       │   │   ├── test_multiple_models_optimizers_losses.py
│       │   │   ├── test_promotion.py
│       │   │   ├── test_rnn.py
│       │   │   └── utils.py
│       │   ├── run_fp16util/
│       │   │   ├── __init__.py
│       │   │   └── test_fp16util.py
│       │   ├── run_fused_layer_norm/
│       │   │   └── test_fused_layer_norm.py
│       │   ├── run_mixed_adam/
│       │   │   ├── __init__.py
│       │   │   ├── test_fp16_optimizer.py
│       │   │   └── test_mixed_adam.py
│       │   └── run_test.py
│       ├── L1/
│       │   ├── common/
│       │   │   ├── compare.py
│       │   │   ├── main_amp.py
│       │   │   └── run_test.sh
│       │   ├── cross_product/
│       │   │   └── run.sh
│       │   └── cross_product_distributed/
│       │       └── run.sh
│       ├── distributed/
│       │   ├── DDP/
│       │   │   ├── ddp_race_condition_test.py
│       │   │   └── run_race_test.sh
│       │   ├── amp_master_params/
│       │   │   ├── amp_master_params.py
│       │   │   ├── compare.py
│       │   │   └── run.sh
│       │   └── synced_batchnorm/
│       │       ├── single_gpu_unit_test.py
│       │       ├── test_groups.py
│       │       ├── two_gpu_unit_test.py
│       │       └── unit_test.sh
│       └── docker_extension_builds/
│           └── run.sh
├── jukebox/
│   ├── Interacting_with_Jukebox.ipynb
│   ├── __init__.py
│   ├── align.py
│   ├── data/
│   │   ├── __init__.py
│   │   ├── artist_genre_processor.py
│   │   ├── data_processor.py
│   │   ├── files_dataset.py
│   │   ├── ids/
│   │   │   ├── v2_artist_ids.txt
│   │   │   ├── v2_genre_ids.txt
│   │   │   ├── v3_artist_ids.txt
│   │   │   └── v3_genre_ids.txt
│   │   ├── labels.py
│   │   └── text_processor.py
│   ├── hparams.py
│   ├── lyricdict.py
│   ├── make_models.py
│   ├── prior/
│   │   ├── __init__.py
│   │   ├── autoregressive.py
│   │   ├── conditioners.py
│   │   └── prior.py
│   ├── sample.py
│   ├── save_html.py
│   ├── tests/
│   │   └── test_sample.py
│   ├── train.py
│   ├── transformer/
│   │   ├── __init__.py
│   │   ├── factored_attention.py
│   │   ├── ops.py
│   │   └── transformer.py
│   ├── utils/
│   │   ├── __init__.py
│   │   ├── audio_utils.py
│   │   ├── checkpoint.py
│   │   ├── dist_adapter.py
│   │   ├── dist_utils.py
│   │   ├── ema.py
│   │   ├── fp16.py
│   │   ├── io.py
│   │   ├── logger.py
│   │   ├── remote_utils.py
│   │   ├── sample_utils.py
│   │   └── torch_utils.py
│   └── vqvae/
│       ├── __init__.py
│       ├── bottleneck.py
│       ├── encdec.py
│       ├── resnet.py
│       └── vqvae.py
├── requirements.txt
├── setup.py
└── tensorboardX/
    ├── .codecov.yml
    ├── .flake8
    ├── .github/
    │   └── ISSUE_TEMPLATE/
    │       ├── bug_report.md
    │       └── feature-requests-or-general-questions.md
    ├── .gitignore
    ├── .travis.yml
    ├── HISTORY.rst
    ├── LICENSE
    ├── MANIFEST.in
    ├── README.md
    ├── compile.sh
    ├── docs/
    │   ├── Makefile
    │   ├── conf.py
    │   ├── index.rst
    │   ├── tensorboard.rst
    │   ├── tutorial.rst
    │   ├── tutorial_zh.rst
    │   └── utils.rst
    ├── examples/
    │   ├── RUN_AFTER_PIP_INSTALL
    │   ├── __init__.py
    │   ├── chainer/
    │   │   ├── extension_logger/
    │   │   │   ├── net.py
    │   │   │   ├── train_dcgan.py
    │   │   │   ├── updater.py
    │   │   │   ├── visualize.py
    │   │   │   └── writetensorboard.py
    │   │   └── plain_logger/
    │   │       ├── data.py
    │   │       ├── net.py
    │   │       └── train_vae.py
    │   ├── demo.py
    │   ├── demo_beholder.py
    │   ├── demo_caffe2.py
    │   ├── demo_custom_scalars.py
    │   ├── demo_embedding.py
    │   ├── demo_graph.py
    │   ├── demo_hparams.py
    │   ├── demo_matplotlib.py
    │   ├── demo_multiple_embedding.py
    │   ├── demo_nvidia_smi.py
    │   ├── demo_onnx.py
    │   └── demo_purge.py
    ├── setup.cfg
    ├── setup.py
    ├── tensorboardX/
    │   ├── __init__.py
    │   ├── beholder/
    │   │   ├── __init__.py
    │   │   ├── beholder.py
    │   │   ├── file_system_tools.py
    │   │   ├── shared_config.py
    │   │   └── video_writing.py
    │   ├── caffe2_graph.py
    │   ├── crc32c.py
    │   ├── embedding.py
    │   ├── event_file_writer.py
    │   ├── onnx_graph.py
    │   ├── proto/
    │   │   ├── __init__.py
    │   │   ├── api.proto
    │   │   ├── api_pb2.py
    │   │   ├── attr_value.proto
    │   │   ├── attr_value_pb2.py
    │   │   ├── event.proto
    │   │   ├── event_pb2.py
    │   │   ├── graph.proto
    │   │   ├── graph_pb2.py
    │   │   ├── layout.proto
    │   │   ├── layout_pb2.py
    │   │   ├── node_def.proto
    │   │   ├── node_def_pb2.py
    │   │   ├── plugin_hparams.proto
    │   │   ├── plugin_hparams_pb2.py
    │   │   ├── plugin_mesh.proto
    │   │   ├── plugin_mesh_pb2.py
    │   │   ├── plugin_pr_curve.proto
    │   │   ├── plugin_pr_curve_pb2.py
    │   │   ├── plugin_text.proto
    │   │   ├── plugin_text_pb2.py
    │   │   ├── resource_handle.proto
    │   │   ├── resource_handle_pb2.py
    │   │   ├── step_stats.proto
    │   │   ├── step_stats_pb2.py
    │   │   ├── summary.proto
    │   │   ├── summary_pb2.py
    │   │   ├── tensor.proto
    │   │   ├── tensor_pb2.py
    │   │   ├── tensor_shape.proto
    │   │   ├── tensor_shape_pb2.py
    │   │   ├── types.proto
    │   │   ├── types_pb2.py
    │   │   ├── versions.proto
    │   │   └── versions_pb2.py
    │   ├── proto_graph.py
    │   ├── pytorch_graph.py
    │   ├── record_writer.py
    │   ├── summary.py
    │   ├── torchvis.py
    │   ├── utils.py
    │   ├── visdom_writer.py
    │   ├── writer.py
    │   └── x2num.py
    ├── tensorboardX.patch
    └── tests/
        ├── __init__.py
        ├── event_file_writer_test.py
        ├── expect/
        │   ├── caffe_mnist.expect
        │   ├── caffe_overfeat.expect
        │   ├── test_caffe2.test_simple_cnnmodel.expect
        │   ├── test_caffe2.test_simple_model.expect
        │   ├── test_pr_curve.test_pr_purve.expect
        │   ├── test_pr_curve.test_pr_purve_raw.expect
        │   ├── test_summary.test_audio.expect
        │   ├── test_summary.test_custom_scalars.expect
        │   ├── test_summary.test_float32_image.expect
        │   ├── test_summary.test_histogram_auto.expect
        │   ├── test_summary.test_histogram_doane.expect
        │   ├── test_summary.test_histogram_fd.expect
        │   ├── test_summary.test_hparams.expect
        │   ├── test_summary.test_image_with_3_channel_batched.expect
        │   ├── test_summary.test_image_with_boxes.expect
        │   ├── test_summary.test_image_with_four_channel.expect
        │   ├── test_summary.test_image_with_four_channel_batched.expect
        │   ├── test_summary.test_image_with_one_channel.expect
        │   ├── test_summary.test_image_with_one_channel_batched.expect
        │   ├── test_summary.test_image_without_channel.expect
        │   ├── test_summary.test_mesh.expect
        │   ├── test_summary.test_text.expect
        │   ├── test_summary.test_uint8_image.expect
        │   └── test_summary.test_video.expect
        ├── expect_reader.py
        ├── record_writer_test.py
        ├── test_beholder.py
        ├── test_caffe2.py
        ├── test_chainer_np.py
        ├── test_crc32c.py
        ├── test_embedding.py
        ├── test_figure.py
        ├── test_numpy.py
        ├── test_onnx_graph.py
        ├── test_pr_curve.py
        ├── test_pytorch_graph.py
        ├── test_pytorch_np.py
        ├── test_record_writer.py
        ├── test_summary.py
        ├── test_summary_writer.py
        ├── test_test.py
        ├── test_utils.py
        ├── test_visdom.py
        └── test_writer.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Global
.DS_Store
.idea

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

================================================
FILE: LICENSE
================================================
Noncommercial Use License

Software Copyright (c) 2020 OpenAI

We don’t claim ownership of the content you create with Jukebox.
We only ask that you use Jukebox responsibly and clearly indicate your content was created using OpenAI’s Jukebox.

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software, including without limitation the rights to use, copy,
modify, merge, publish, distribute, and/or sublicense copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

No portion of the Software, nor any content created with the Software, may be used for commercial purposes.

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

The above copyright notice and this permission notice need not be included with content created by the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY,FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

================================================
FILE: MANIFEST.in
================================================
recursive-include jukebox *.py
recursive-include jukebox *.txt


================================================
FILE: README.md
================================================
**Status:** Archive (code is provided as-is, no updates expected)

# Jukebox
Code for "Jukebox: A Generative Model for Music"

[Paper](https://arxiv.org/abs/2005.00341) 
[Blog](https://openai.com/blog/jukebox) 
[Explorer](http://jukebox.openai.com/) 
[Colab](https://colab.research.google.com/github/openai/jukebox/blob/master/jukebox/Interacting_with_Jukebox.ipynb) 

# Install
Install the conda package manager from https://docs.conda.io/en/latest/miniconda.html    
    
``` 
# Required: Sampling
conda create --name jukebox python=3.7.5
conda activate jukebox
conda install mpi4py=3.0.3 # if this fails, try: pip install mpi4py==3.0.3
conda install pytorch=1.4 torchvision=0.5 cudatoolkit=10.0 -c pytorch
git clone https://github.com/openai/jukebox.git
cd jukebox
pip install -r requirements.txt
pip install -e .

# Required: Training
conda install av=7.0.01 -c conda-forge 
pip install ./tensorboardX
 
# Optional: Apex for faster training with fused_adam
conda install pytorch=1.1 torchvision=0.3 cudatoolkit=10.0 -c pytorch
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex
```

# Sampling
## Sampling from scratch
To sample normally, run the following command. Model can be `5b`, `5b_lyrics`, `1b_lyrics`
``` 
python jukebox/sample.py --model=5b_lyrics --name=sample_5b --levels=3 --sample_length_in_seconds=20 \
--total_sample_length_in_seconds=180 --sr=44100 --n_samples=6 --hop_fraction=0.5,0.5,0.125
```
``` 
python jukebox/sample.py --model=1b_lyrics --name=sample_1b --levels=3 --sample_length_in_seconds=20 \
--total_sample_length_in_seconds=180 --sr=44100 --n_samples=16 --hop_fraction=0.5,0.5,0.125
```
The above generates the first `sample_length_in_seconds` seconds of audio from a song of total length `total_sample_length_in_seconds`.
To use multiple GPU's, launch the above scripts as `mpiexec -n {ngpus} python jukebox/sample.py ...` so they use `{ngpus}`

The samples decoded from each level are stored in `{name}/level_{level}`. 
You can also view the samples as an html with the aligned lyrics under `{name}/level_{level}/index.html`. 
Run `python -m http.server` and open the html through the server to see the lyrics animate as the song plays.  
A summary of all sampling data including zs, x, labels and sampling_kwargs is stored in `{name}/level_{level}/data.pth.tar`.

The hps are for a V100 GPU with 16 GB GPU memory. The `1b_lyrics`, `5b`, and `5b_lyrics` top-level priors take up 
3.8 GB, 10.3 GB, and 11.5 GB, respectively. The peak memory usage to store transformer key, value cache is about 400 MB 
for `1b_lyrics` and 1 GB for `5b_lyrics` per sample. If you are having trouble with CUDA OOM issues, try `1b_lyrics` or 
decrease `max_batch_size` in sample.py, and `--n_samples` in the script call.

On a V100, it takes about 3 hrs to fully sample 20 seconds of music. Since this is a long time, it is recommended to use `n_samples > 1` so you can generate as many samples as possible in parallel. The 1B lyrics and upsamplers can process 16 samples at a time, while 5B can fit only up to 3. Since the vast majority of time is spent on upsampling, we recommend using a multiple of 3 less than 16 like `--n_samples 15` for `5b_lyrics`. This will make the top-level generate samples in groups of three while upsampling is done in one pass.

To continue sampling from already generated codes for a longer duration, you can run
```
python jukebox/sample.py --model=5b_lyrics --name=sample_5b --levels=3 --mode=continue \
--codes_file=sample_5b/level_0/data.pth.tar --sample_length_in_seconds=40 --total_sample_length_in_seconds=180 \
--sr=44100 --n_samples=6 --hop_fraction=0.5,0.5,0.125
```
Here, we take the 20 seconds samples saved from the first sampling run at `sample_5b/level_0/data.pth.tar` and continue by adding 20 more seconds. 

You could also continue directly from the level 2 saved outputs, just pass `--codes_file=sample_5b/level_2/data.pth.tar`.
 Note this will upsample the full 40 seconds song at the end.

If you stopped sampling at only the first level and want to upsample the saved codes, you can run
```
python jukebox/sample.py --model=5b_lyrics --name=sample_5b --levels=3 --mode=upsample \
--codes_file=sample_5b/level_2/data.pth.tar --sample_length_in_seconds=20 --total_sample_length_in_seconds=180 \
--sr=44100 --n_samples=6 --hop_fraction=0.5,0.5,0.125
```
Here, we take the 20 seconds samples saved from the first sampling run at `sample_5b/level_2/data.pth.tar` and upsample the lower two levels.

## Prompt with your own music
If you want to prompt the model with your own creative piece or any other music, first save them as wave files and run
```
python jukebox/sample.py --model=5b_lyrics --name=sample_5b_prompted --levels=3 --mode=primed \
--audio_file=path/to/recording.wav,awesome-mix.wav,fav-song.wav,etc.wav --prompt_length_in_seconds=12 \
--sample_length_in_seconds=20 --total_sample_length_in_seconds=180 --sr=44100 --n_samples=6 --hop_fraction=0.5,0.5,0.125
```
This will load the four files, tile them to fill up to `n_samples` batch size, and prime the model with the first `prompt_length_in_seconds` seconds.

# Training
## VQVAE
To train a small vqvae, run
```
mpiexec -n {ngpus} python jukebox/train.py --hps=small_vqvae --name=small_vqvae --sample_length=262144 --bs=4 \
--audio_files_dir={audio_files_dir} --labels=False --train --aug_shift --aug_blend
```
Here, `{audio_files_dir}` is the directory in which you can put the audio files for your dataset, and `{ngpus}` is number of GPU's you want to use to train. 
The above trains a two-level VQ-VAE with `downs_t = (5,3)`, and `strides_t = (2, 2)` meaning we downsample the audio by `2**5 = 32` to get the first level of codes, and `2**8 = 256` to get the second level codes.  
Checkpoints are stored in the `logs` folder. You can monitor the training by running Tensorboard
```
tensorboard --logdir logs
```
    
## Prior
### Train prior or upsamplers
Once the VQ-VAE is trained, we can restore it from its saved checkpoint and train priors on the learnt codes. 
To train the top-level prior, we can run

```
mpiexec -n {ngpus} python jukebox/train.py --hps=small_vqvae,small_prior,all_fp16,cpu_ema --name=small_prior \
--sample_length=2097152 --bs=4 --audio_files_dir={audio_files_dir} --labels=False --train --test --aug_shift --aug_blend \
--restore_vqvae=logs/small_vqvae/checkpoint_latest.pth.tar --prior --levels=2 --level=1 --weight_decay=0.01 --save_iters=1000
```

To train the upsampler, we can run
```
mpiexec -n {ngpus} python jukebox/train.py --hps=small_vqvae,small_upsampler,all_fp16,cpu_ema --name=small_upsampler \
--sample_length=262144 --bs=4 --audio_files_dir={audio_files_dir} --labels=False --train --test --aug_shift --aug_blend \
--restore_vqvae=logs/small_vqvae/checkpoint_latest.pth.tar --prior --levels=2 --level=0 --weight_decay=0.01 --save_iters=1000
```
We pass `sample_length = n_ctx * downsample_of_level` so that after downsampling the tokens match the n_ctx of the prior hps. 
Here, `n_ctx = 8192` and `downsamples = (32, 256)`, giving `sample_lengths = (8192 * 32, 8192 * 256) = (65536, 2097152)` respectively for the bottom and top level. 

### Learning rate annealing
To get the best sample quality anneal the learning rate to 0 near the end of training. To do so, continue training from the latest 
checkpoint and run with
```
--restore_prior="path/to/checkpoint" --lr_use_linear_decay --lr_start_linear_decay={already_trained_steps} --lr_decay={decay_steps_as_needed}
```

### Reuse pre-trained VQ-VAE and train top-level prior on new dataset from scratch.
#### Train without labels
Our pre-trained VQ-VAE can produce compressed codes for a wide variety of genres of music, and the pre-trained upsamplers 
can upsample them back to audio that sound very similar to the original audio.
To re-use these for a new dataset of your choice, you can retrain just the top-level  

To train top-level on a new dataset, run
```
mpiexec -n {ngpus} python jukebox/train.py --hps=vqvae,small_prior,all_fp16,cpu_ema --name=pretrained_vqvae_small_prior \
--sample_length=1048576 --bs=4 --aug_shift --aug_blend --audio_files_dir={audio_files_dir} \
--labels=False --train --test --prior --levels=3 --level=2 --weight_decay=0.01 --save_iters=1000
```
Training the `small_prior` with a batch size of 2, 4, and 8 requires 6.7 GB, 9.3 GB, and 15.8 GB of GPU memory, respectively. A few days to a week of training typically yields reasonable samples when the dataset is homogeneous (e.g. all piano pieces, songs of the same style, etc).

Near the end of training, follow [this](#learning-rate-annealing) to anneal the learning rate to 0

#### Sample from new model
You can then run sample.py with the top-level of our models replaced by your new model. To do so,
- Add an entry `my_model=("vqvae", "upsampler_level_0", "upsampler_level_1", "small_prior")` in `MODELS` in `make_models.py`. 
- Update the `small_prior` dictionary in `hparams.py` to include `restore_prior='path/to/checkpoint'`. If you
you changed any hps directly in the command line script (eg:`heads`), make sure to update them in the dictionary too so 
that `make_models` restores our checkpoint correctly.
- Run sample.py as outlined in the sampling section, but now with `--model=my_model` 

For example, let's say we trained `small_vqvae`, `small_prior`, and `small_upsampler` under `/path/to/jukebox/logs`. In `make_models.py`, we are going to declare a tuple of the new models as `my_model`.
```
MODELS = {
    '5b': ("vqvae", "upsampler_level_0", "upsampler_level_1", "prior_5b"),
    '5b_lyrics': ("vqvae", "upsampler_level_0", "upsampler_level_1", "prior_5b_lyrics"),
    '1b_lyrics': ("vqvae", "upsampler_level_0", "upsampler_level_1", "prior_1b_lyrics"),
    'my_model': ("my_small_vqvae", "my_small_upsampler", "my_small_prior"),
}
```

Next, in `hparams.py`, we add them to the registry with the corresponding `restore_`paths and any other command line options used during training. Another important note is that for top-level priors with lyric conditioning, we have to locate a self-attention layer that shows alignment between the lyric and music tokens. Look for layers where `prior.prior.transformer._attn_mods[layer].attn_func` is either 6 or 7. If your model is starting to sing along lyrics, it means some layer, head pair has learned alignment. Congrats!
```
my_small_vqvae = Hyperparams(
    restore_vqvae='/path/to/jukebox/logs/small_vqvae/checkpoint_some_step.pth.tar',
)
my_small_vqvae.update(small_vqvae)
HPARAMS_REGISTRY["my_small_vqvae"] = my_small_vqvae

my_small_prior = Hyperparams(
    restore_prior='/path/to/jukebox/logs/small_prior/checkpoint_latest.pth.tar',
    level=1,
    labels=False,
    # TODO For the two lines below, if `--labels` was used and the model is
    # trained with lyrics, find and enter the layer, head pair that has learned
    # alignment.
    alignment_layer=47,
    alignment_head=0,
)
my_small_prior.update(small_prior)
HPARAMS_REGISTRY["my_small_prior"] = my_small_prior

my_small_upsampler = Hyperparams(
    restore_prior='/path/to/jukebox/logs/small_upsampler/checkpoint_latest.pth.tar',
    level=0,
    labels=False,
)
my_small_upsampler.update(small_upsampler)
HPARAMS_REGISTRY["my_small_upsampler"] = my_small_upsampler
```

#### Train with labels 
To train with you own metadata for your audio files, implement `get_metadata` in `data/files_dataset.py` to return the 
`artist`, `genre` and `lyrics` for a given audio file. For now, you can pass `''` for lyrics to not use any lyrics.

For training with labels, we'll use `small_labelled_prior` in `hparams.py`, and we set `labels=True,labels_v3=True`. 
We use 2 kinds of labels information:
- Artist/Genre: 
  - For each file, we return an artist_id and a list of genre_ids. The reason we have a list and not a single genre_id 
  is that in v2, we split genres like `blues_rock` into a bag of words `[blues, rock]`, and we pass atmost 
  `max_bow_genre_size` of those, in `v3` we consider it as a single word and just set `max_bow_genre_size=1`.
  - Update the `v3_artist_ids` and `v3_genre_ids` to use ids from your new dataset. 
  - In `small_labelled_prior`, set the hps `y_bins = (number_of_genres, number_of_artists)` and `max_bow_genre_size=1`. 
- Timing: 
  - For each chunk of audio, we return the `total_length` of the song, the `offset` the current audio chunk is at and 
  the `sample_length` of the audio chunk. We have three timing embeddings: total_length, our current position, and our 
  current position as a fraction of the total length, and we divide the range of these values into `t_bins` discrete bins. 
  - In `small_labelled_prior`, set the hps `min_duration` and `max_duration` to be the shortest/longest duration of audio 
  files you want for your dataset, and `t_bins` for how many bins you want to discretize timing information into. Note 
  `min_duration * sr` needs to be at least `sample_length` to have an audio chunk in it.

After these modifications, to train a top-level with labels, run
```
mpiexec -n {ngpus} python jukebox/train.py --hps=vqvae,small_labelled_prior,all_fp16,cpu_ema --name=pretrained_vqvae_small_prior_labels \
--sample_length=1048576 --bs=4 --aug_shift --aug_blend --audio_files_dir={audio_files_dir} \
--labels=True --train --test --prior --levels=3 --level=2 --weight_decay=0.01 --save_iters=1000
```

For sampling, follow same instructions as [above](#sample-from-new-model) but use `small_labelled_prior` instead of `small_prior`.  

#### Train with lyrics
To train in addition with lyrics, update `get_metadata` in `data/files_dataset.py` to return `lyrics` too.
For training with lyrics, we'll use `small_single_enc_dec_prior` in `hparams.py`. 
- Lyrics: 
  - For each file, we linearly align the lyric characters to the audio, find the position in lyric that corresponds to 
  the midpoint of our audio chunk, and pass a window of `n_tokens` lyric characters centred around that. 
  - In `small_single_enc_dec_prior`, set the hps `use_tokens=True` and `n_tokens` to be the number of lyric characters 
  to use for an audio chunk. Set it according to the `sample_length` you're training on so that its large enough that 
  the lyrics for an audio chunk are almost always found inside a window of that size.
  - If you use a non-English vocabulary, update `text_processor.py` with your new vocab and set
  `n_vocab = number of characters in vocabulary` accordingly in `small_single_enc_dec_prior`. In v2, we had a `n_vocab=80` 
  and in v3 we missed `+` and so `n_vocab=79` of characters. 

After these modifications, to train a top-level with labels and lyrics, run
```
mpiexec -n {ngpus} python jukebox/train.py --hps=vqvae,small_single_enc_dec_prior,all_fp16,cpu_ema --name=pretrained_vqvae_small_single_enc_dec_prior_labels \
--sample_length=786432 --bs=4 --aug_shift --aug_blend --audio_files_dir={audio_files_dir} \
--labels=True --train --test --prior --levels=3 --level=2 --weight_decay=0.01 --save_iters=1000
```
To simplify hps choices, here we used a `single_enc_dec` model like the `1b_lyrics` model that combines both encoder and 
decoder of the transformer into a single model. We do so by merging the lyric vocab and vq-vae vocab into a single 
larger vocab, and flattening the lyric tokens and the vq-vae codes into a single sequence of length `n_ctx + n_tokens`. 
This uses `attn_order=12` which includes `prime_attention` layers with keys/values from lyrics and queries from audio. 
If you instead want to use a model with the usual encoder-decoder style transformer, use `small_sep_enc_dec_prior`.

For sampling, follow same instructions as [above](#sample-from-new-model) but use `small_single_enc_dec_prior` instead of 
`small_prior`. To also get the alignment between lyrics and samples in the saved html, you'll need to set `alignment_layer` 
and `alignment_head` in `small_single_enc_dec_prior`. To find which layer/head is best to use, run a forward pass on a training example,
save the attention weight tensors for all prime_attention layers, and pick the (layer, head) which has the best linear alignment 
pattern between the lyrics keys and music queries. 

### Fine-tune pre-trained top-level prior to new style(s)
Previously, we showed how to train a small top-level prior from scratch. Assuming you have a GPU with at least 15 GB of memory and support for fp16, you could fine-tune from our pre-trained 1B top-level prior. Here are the steps:

- Support `--labels=True` by implementing `get_metadata` in `jukebox/data/files_dataset.py` for your dataset.
- Add new entries in `jukebox/data/ids`. We recommend replacing existing mappings (e.g. rename `"unknown"`, etc with styles of your choice). This uses the pre-trained style vectors as initialization and could potentially save some compute.

After these modifications, run 
```
mpiexec -n {ngpus} python jukebox/train.py --hps=vqvae,prior_1b_lyrics,all_fp16,cpu_ema --name=finetuned \
--sample_length=1048576 --bs=1 --aug_shift --aug_blend --audio_files_dir={audio_files_dir} \
--labels=True --train --test --prior --levels=3 --level=2 --weight_decay=0.01 --save_iters=1000
```
To get the best sample quality, it is recommended to anneal the learning rate in the end. Training the 5B top-level requires GPipe which is not supported in this release.

# Citation

Please cite using the following bibtex entry:

```
@article{dhariwal2020jukebox,
  title={Jukebox: A Generative Model for Music},
  author={Dhariwal, Prafulla and Jun, Heewoo and Payne, Christine and Kim, Jong Wook and Radford, Alec and Sutskever, Ilya},
  journal={arXiv preprint arXiv:2005.00341},
  year={2020}
}
```

# License 
[Noncommercial Use License](./LICENSE) 

It covers both released code and weights. 


================================================
FILE: apex/.gitignore
================================================
apex.egg-info
dist
build
docs/build
*~

================================================
FILE: apex/.nojekyll
================================================


================================================
FILE: apex/LICENSE
================================================
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

================================================
FILE: apex/README.md
================================================
# Introduction

This repository holds NVIDIA-maintained utilities to streamline 
mixed precision and distributed training in Pytorch. 
Some of the code here will be included in upstream Pytorch eventually.
The intention of Apex is to make up-to-date utilities available to 
users as quickly as possible.

## Full API Documentation: [https://nvidia.github.io/apex](https://nvidia.github.io/apex)

# Contents

## 1. Amp:  Automatic Mixed Precision

`apex.amp` is a tool to enable mixed precision training by changing only 3 lines of your script.
Users can easily experiment with different pure and mixed precision training modes by supplying
different flags to `amp.initialize`.

[Webinar introducing Amp](https://info.nvidia.com/webinar-mixed-precision-with-pytorch-reg-page.html)
(The flag `cast_batchnorm` has been renamed to `keep_batchnorm_fp32`).

[API Documentation](https://nvidia.github.io/apex/amp.html)

[Comprehensive Imagenet example](https://github.com/NVIDIA/apex/tree/master/examples/imagenet)

[DCGAN example coming soon...](https://github.com/NVIDIA/apex/tree/master/examples/dcgan)

[Moving to the new Amp API](https://nvidia.github.io/apex/amp.html#transition-guide-for-old-api-users) (for users of the deprecated "Amp" and "FP16_Optimizer" APIs)

## 2. Distributed Training

`apex.parallel.DistributedDataParallel` is a module wrapper, similar to 
`torch.nn.parallel.DistributedDataParallel`.  It enables convenient multiprocess distributed training,
optimized for NVIDIA's NCCL communication library.

[API Documentation](https://nvidia.github.io/apex/parallel.html)

[Python Source](https://github.com/NVIDIA/apex/tree/master/apex/parallel)

[Example/Walkthrough](https://github.com/NVIDIA/apex/tree/master/examples/simple/distributed)

The [Imagenet example](https://github.com/NVIDIA/apex/tree/master/examples/imagenet)
shows use of `apex.parallel.DistributedDataParallel` along with `apex.amp`.

### Synchronized Batch Normalization

`apex.parallel.SyncBatchNorm` extends `torch.nn.modules.batchnorm._BatchNorm` to
support synchronized BN.
It allreduces stats across processes during multiprocess (DistributedDataParallel) training.
Synchronous BN has been used in cases where only a small
local minibatch can fit on each GPU.
Allreduced stats increase the effective batch size for the BN layer to the
global batch size across all processes (which, technically, is the correct
formulation).
Synchronous BN has been observed to improve converged accuracy in some of our research models.

# Requirements

Python 3

CUDA 9 or newer

PyTorch 0.4 or newer.  The CUDA and C++ extensions require pytorch 1.0 or newer.

We recommend the latest stable release, obtainable from
[https://pytorch.org/](https://pytorch.org/).  We also test against the latest master branch, obtainable from [https://github.com/pytorch/pytorch](https://github.com/pytorch/pytorch).

It's often convenient to use Apex in Docker containers.  Compatible options include:
* [NVIDIA Pytorch containers from NGC](https://ngc.nvidia.com/catalog/containers/nvidia%2Fpytorch), which come with Apex preinstalled.  To use the latest Amp API, you may need to `pip uninstall apex` then reinstall Apex using the **Quick Start** commands below.
* [official Pytorch -devel Dockerfiles](https://hub.docker.com/r/pytorch/pytorch/tags), e.g. `docker pull pytorch/pytorch:nightly-devel-cuda10.0-cudnn7`, in which you can install Apex using the **Quick Start** commands.

See the [Docker example folder](https://github.com/NVIDIA/apex/tree/master/examples/docker) for details.

# Quick Start

### Linux

For performance and full functionality, we recommend installing Apex with
CUDA and C++ extensions via
```
$ git clone https://github.com/NVIDIA/apex
$ cd apex
$ pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
```

Apex also supports a Python-only build (required with Pytorch 0.4) via
```
$ pip install -v --no-cache-dir .
```
A Python-only build omits:
- Fused kernels required to use `apex.optimizers.FusedAdam`.
- Fused kernels required to use `apex.normalization.FusedLayerNorm`.
- Fused kernels that improve the performance and numerical stability of `apex.parallel.SyncBatchNorm`.
- Fused kernels that improve the performance of `apex.parallel.DistributedDataParallel` and `apex.amp`.
`DistributedDataParallel`, `amp`, and `SyncBatchNorm` will still be usable, but they may be slower.

### Windows support
Windows support is experimental, and Linux is recommended.  `pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .` may work if you were able to build Pytorch from source
on your system.  `pip install -v --no-cache-dir .` (without CUDA/C++ extensions) is more likely to work.  If you installed Pytorch in a Conda environment, make sure to install Apex in that same environment.


================================================
FILE: apex/apex/RNN/README.md
================================================
Under construction...


================================================
FILE: apex/apex/RNN/RNNBackend.py
================================================
import torch
import torch.nn as nn
from torch.autograd import Variable

import torch.nn.functional as F

import math


def is_iterable(maybe_iterable):
    return isinstance(maybe_iterable, list) or isinstance(maybe_iterable, tuple)


def flatten_list(tens_list):
    """
    flatten_list
    """
    if not is_iterable(tens_list):
        return tens_list
    
    return torch.cat(tens_list, dim=0).view(len(tens_list), *tens_list[0].size() )

    
#These modules always assumes batch_first
class bidirectionalRNN(nn.Module):
    """
    bidirectionalRNN
    """
    def __init__(self, inputRNN, num_layers=1, dropout = 0):
        super(bidirectionalRNN, self).__init__()
        self.dropout = dropout
        self.fwd = stackedRNN(inputRNN, num_layers=num_layers, dropout = dropout)
        self.bckwrd = stackedRNN(inputRNN.new_like(), num_layers=num_layers, dropout = dropout)
        self.rnns = nn.ModuleList([self.fwd, self.bckwrd])
        
    #collect hidden option will return all hidden/cell states from entire RNN
    def forward(self, input, collect_hidden=False):
        """
        forward()
        """
        seq_len = input.size(0)
        bsz = input.size(1)

        fwd_out, fwd_hiddens = list(self.fwd(input, collect_hidden = collect_hidden))
        bckwrd_out, bckwrd_hiddens = list(self.bckwrd(input, reverse=True, collect_hidden = collect_hidden))
        
        output = torch.cat( [fwd_out, bckwrd_out], -1 )
        hiddens = tuple( torch.cat(hidden, -1) for hidden in zip( fwd_hiddens, bckwrd_hiddens) )

        return output, hiddens

    def reset_parameters(self):
        """
        reset_parameters()
        """
        for rnn in self.rnns:
            rnn.reset_parameters()
        
    def init_hidden(self, bsz):
        """
        init_hidden()
        """
        for rnn in self.rnns:
            rnn.init_hidden(bsz)

    def detach_hidden(self):
        """
        detach_hidden()
        """
        for rnn in self.rnns:
            rnn.detachHidden()
        
    def reset_hidden(self, bsz):
        """
        reset_hidden()
        """
        for rnn in self.rnns:
            rnn.reset_hidden(bsz)

    def init_inference(self, bsz):    
        """
        init_inference()
        """
        for rnn in self.rnns:
            rnn.init_inference(bsz)

   
#assumes hidden_state[0] of inputRNN is output hidden state
#constructor either takes an RNNCell or list of RNN layers
class stackedRNN(nn.Module):        
    """
    stackedRNN
    """
    def __init__(self, inputRNN, num_layers=1, dropout=0):
        super(stackedRNN, self).__init__()
        
        self.dropout = dropout
        
        if isinstance(inputRNN, RNNCell):
            self.rnns = [inputRNN]
            for i in range(num_layers-1):
                self.rnns.append(inputRNN.new_like(inputRNN.output_size))
        elif isinstance(inputRNN, list):
            assert len(inputRNN) == num_layers, "RNN list length must be equal to num_layers"
            self.rnns=inputRNN
        else:
            raise RuntimeError()
        
        self.nLayers = len(self.rnns)
        
        self.rnns = nn.ModuleList(self.rnns)


    '''
    Returns output as hidden_state[0] Tensor([sequence steps][batch size][features])
    If collect hidden will also return Tuple(
        [n_hidden_states][sequence steps] Tensor([layer][batch size][features])
    )
    If not collect hidden will also return Tuple(
        [n_hidden_states] Tensor([layer][batch size][features])
    '''
    def forward(self, input, collect_hidden=False, reverse=False):
        """
        forward()
        """
        seq_len = input.size(0)
        bsz = input.size(1)
        inp_iter = reversed(range(seq_len)) if reverse else range(seq_len)

        hidden_states = [[] for i in range(self.nLayers)]
        outputs = []

        for seq in inp_iter:
            for layer in range(self.nLayers):

                if layer == 0:
                    prev_out = input[seq]
                    
                outs = self.rnns[layer](prev_out)

                if collect_hidden:
                    hidden_states[layer].append(outs)
                elif seq == seq_len-1:
                    hidden_states[layer].append(outs)
                    
                prev_out = outs[0]

            outputs.append(prev_out)

        if reverse:
            outputs = list(reversed(outputs))
        '''
        At this point outputs is in format:
        list( [seq_length] x Tensor([bsz][features]) )
        need to convert it to:
        list( Tensor([seq_length][bsz][features]) )
        '''
        output = flatten_list(outputs)

        '''
        hidden_states at this point is in format:
        list( [layer][seq_length][hidden_states] x Tensor([bsz][features]) )
        need to convert it to:
          For not collect hidden:
            list( [hidden_states] x Tensor([layer][bsz][features]) )
          For collect hidden:
            list( [hidden_states][seq_length] x Tensor([layer][bsz][features]) )
        '''
        if not collect_hidden:
            seq_len = 1
        n_hid = self.rnns[0].n_hidden_states
        new_hidden = [ [ [ None for k in range(self.nLayers)] for j in range(seq_len) ] for i in range(n_hid) ]


        for i in range(n_hid):
            for j in range(seq_len):
                for k in range(self.nLayers):
                    new_hidden[i][j][k] = hidden_states[k][j][i]

        hidden_states = new_hidden
        #Now in format list( [hidden_states][seq_length][layer] x Tensor([bsz][features]) )
        #Reverse seq_length if reverse
        if reverse:
            hidden_states = list( list(reversed(list(entry))) for entry in hidden_states)

        #flatten layer dimension into tensor
        hiddens = list( list(
            flatten_list(seq) for seq in hidden )
                        for hidden in hidden_states )
        
        #Now in format list( [hidden_states][seq_length] x Tensor([layer][bsz][features]) )
        #Remove seq_length dimension if not collect_hidden
        if not collect_hidden:
            hidden_states = list( entry[0] for entry in hidden_states)
        return output, hidden_states
    
    def reset_parameters(self):
        """
        reset_parameters()
        """
        for rnn in self.rnns:
            rnn.reset_parameters()
        
    def init_hidden(self, bsz):
        """
        init_hidden()
        """
        for rnn in self.rnns:
            rnn.init_hidden(bsz)

    def detach_hidden(self):
        """
        detach_hidden()
        """
        for rnn in self.rnns:
            rnn.detach_hidden()
        
    def reset_hidden(self, bsz):
        """
        reset_hidden()
        """
        for rnn in self.rnns:
            rnn.reset_hidden(bsz)

    def init_inference(self, bsz):    
        """ 
        init_inference()
        """
        for rnn in self.rnns:
            rnn.init_inference(bsz)

class RNNCell(nn.Module):
    """ 
    RNNCell 
    gate_multiplier is related to the architecture you're working with
    For LSTM-like it will be 4 and GRU-like will be 3.
    Always assumes input is NOT batch_first.
    Output size that's not hidden size will use output projection
    Hidden_states is number of hidden states that are needed for cell
    if one will go directly to cell as tensor, if more will go as list
    """
    def __init__(self, gate_multiplier, input_size, hidden_size, cell, n_hidden_states = 2, bias = False, output_size = None):
        super(RNNCell, self).__init__()

        self.gate_multiplier = gate_multiplier
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.cell = cell
        self.bias = bias
        self.output_size = output_size
        if output_size is None:
            self.output_size = hidden_size

        self.gate_size = gate_multiplier * self.hidden_size
        self.n_hidden_states = n_hidden_states

        self.w_ih = nn.Parameter(torch.Tensor(self.gate_size, self.input_size))
        self.w_hh = nn.Parameter(torch.Tensor(self.gate_size, self.output_size))

        #Check if there's recurrent projection
        if(self.output_size != self.hidden_size):
            self.w_ho = nn.Parameter(torch.Tensor(self.output_size, self.hidden_size))

        self.b_ih = self.b_hh = None
        if self.bias:
            self.b_ih = nn.Parameter(torch.Tensor(self.gate_size))
            self.b_hh = nn.Parameter(torch.Tensor(self.gate_size))
            
        #hidden states for forward
        self.hidden = [ None for states in range(self.n_hidden_states)]

        self.reset_parameters()

    def new_like(self, new_input_size=None):
        """
        new_like()
        """
        if new_input_size is None:
            new_input_size = self.input_size
            
        return type(self)(self.gate_multiplier,
                       new_input_size,
                       self.hidden_size,
                       self.cell,
                       self.n_hidden_states,
                       self.bias,
                       self.output_size)

    
    #Use xavier where we can (weights), otherwise use uniform (bias)
    def reset_parameters(self, gain=1):
        """
        reset_parameters()
        """
        stdev = 1.0 / math.sqrt(self.hidden_size)
        for param in self.parameters():
            param.data.uniform_(-stdev, stdev)
    '''
    Xavier reset:
    def reset_parameters(self, gain=1):
        stdv = 1.0 / math.sqrt(self.gate_size)

        for param in self.parameters():
            if (param.dim() > 1):
                torch.nn.init.xavier_normal(param, gain)
            else:
                param.data.uniform_(-stdv, stdv)
    '''
    def init_hidden(self, bsz):
        """
        init_hidden()
        """
        for param in self.parameters():
            if param is not None:
                a_param = param
                break

        for i, _ in enumerate(self.hidden):
            if(self.hidden[i] is None or self.hidden[i].data.size()[0] != bsz):

                if i==0:
                    hidden_size = self.output_size
                else:
                    hidden_size = self.hidden_size

                tens = a_param.data.new(bsz, hidden_size).zero_()
                self.hidden[i] = Variable(tens, requires_grad=False)
            
        
    def reset_hidden(self, bsz):
        """
        reset_hidden()
        """
        for i, _ in enumerate(self.hidden):
            self.hidden[i] = None
        self.init_hidden(bsz)

    def detach_hidden(self):
        """
        detach_hidden()
        """
        for i, _ in enumerate(self.hidden):
            if self.hidden[i] is None:
                raise RuntimeError("Must initialize hidden state before you can detach it")
        for i, _ in enumerate(self.hidden):
            self.hidden[i] = self.hidden[i].detach()
        
    def forward(self, input):
        """
        forward()
        if not inited or bsz has changed this will create hidden states
        """
        self.init_hidden(input.size()[0])

        hidden_state = self.hidden[0] if self.n_hidden_states == 1 else self.hidden
        self.hidden = self.cell(input, hidden_state, self.w_ih, self.w_hh, b_ih=self.b_ih, b_hh=self.b_hh)
        if(self.n_hidden_states > 1):
            self.hidden = list(self.hidden)
        else:
            self.hidden=[self.hidden]

        if self.output_size != self.hidden_size:
            self.hidden[0] = F.linear(self.hidden[0], self.w_ho)

        return tuple(self.hidden)


================================================
FILE: apex/apex/RNN/__init__.py
================================================
from .models import LSTM, GRU, ReLU, Tanh, mLSTM

__all__ = ['models']


================================================
FILE: apex/apex/RNN/cells.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F

from .RNNBackend import RNNCell

from torch.nn._functions.thnn import rnnFusedPointwise as fusedBackend

import math 


class mLSTMRNNCell(RNNCell):
    """
    mLSTMRNNCell
    """

    def __init__(self, input_size, hidden_size, bias = False, output_size = None):
        gate_multiplier = 4
        super(mLSTMRNNCell, self).__init__(gate_multiplier, input_size, hidden_size, mLSTMCell, n_hidden_states = 2, bias = bias, output_size = output_size)

        self.w_mih = nn.Parameter(torch.Tensor(self.output_size, self.input_size))
        self.w_mhh = nn.Parameter(torch.Tensor(self.output_size, self.output_size))

        self.reset_parameters()

    def forward(self, input):
        """
        mLSTMRNNCell.forward()
        """
        #if not inited or bsz has changed this will create hidden states
        self.init_hidden(input.size()[0])

        hidden_state = self.hidden[0] if self.n_hidden_states == 1 else self.hidden

        self.hidden = list(
                           self.cell(input, hidden_state, self.w_ih, self.w_hh, self.w_mih, self.w_mhh,
                           b_ih=self.b_ih, b_hh=self.b_hh)
        )
        
        if self.output_size != self.hidden_size:
            self.hidden[0] = F.linear(self.hidden[0], self.w_ho)
        return tuple(self.hidden)


    def new_like(self, new_input_size=None):
        if new_input_size is None:
            new_input_size = self.input_size
        
        return type(self)(
            new_input_size,
            self.hidden_size,
            self.bias,
            self.output_size)

def mLSTMCell(input, hidden, w_ih, w_hh, w_mih, w_mhh, b_ih=None, b_hh=None):
    """
    mLSTMCell
    """

    if input.is_cuda:
        igates = F.linear(input, w_ih)
        m = F.linear(input, w_mih) * F.linear(hidden[0], w_mhh)
        hgates = F.linear(m, w_hh)

        state = fusedBackend.LSTMFused.apply
        return state(igates, hgates, hidden[1], b_ih, b_hh)

    hx, cx = hidden
    
    m = F.linear(input, w_mih) * F.linear(hidden[0], w_mhh)
    gates = F.linear(input, w_ih, b_ih) + F.linear(m, w_hh, b_hh)

    ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)

    ingate = F.sigmoid(ingate)
    forgetgate = F.sigmoid(forgetgate)
    cellgate = F.tanh(cellgate)
    outgate = F.sigmoid(outgate)
    
    cy = (forgetgate * cx) + (ingate * cellgate)
    hy = outgate * F.tanh(cy)
    
    return hy, cy
                                                                            

================================================
FILE: apex/apex/RNN/models.py
================================================
import torch

from torch.nn._functions.rnn import LSTMCell, RNNReLUCell, RNNTanhCell, GRUCell

from .RNNBackend import bidirectionalRNN, stackedRNN, RNNCell
from .cells import mLSTMRNNCell, mLSTMCell

def toRNNBackend(inputRNN, num_layers, bidirectional=False, dropout = 0):
    """
    :class:`toRNNBackend`
    """

    if bidirectional:
        return bidirectionalRNN(inputRNN, num_layers, dropout = dropout)
    else:
        return stackedRNN(inputRNN, num_layers, dropout = dropout)


def LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=False, dropout=0, bidirectional=False, output_size = None):
    """
    :class:`LSTM`
    """
    inputRNN = RNNCell(4, input_size, hidden_size, LSTMCell, 2, bias, output_size)
    return toRNNBackend(inputRNN, num_layers, bidirectional, dropout=dropout)

def GRU(input_size, hidden_size, num_layers, bias=True, batch_first=False, dropout=0, bidirectional=False, output_size = None):
    """
    :class:`GRU`
    """
    inputRNN = RNNCell(3, input_size, hidden_size, GRUCell, 1, bias, output_size)
    return toRNNBackend(inputRNN, num_layers, bidirectional, dropout=dropout)

def ReLU(input_size, hidden_size, num_layers, bias=True, batch_first=False, dropout=0, bidirectional=False, output_size = None):
    """
    :class:`ReLU`
    """
    inputRNN = RNNCell(1, input_size, hidden_size, RNNReLUCell, 1, bias, output_size)
    return toRNNBackend(inputRNN, num_layers, bidirectional, dropout=dropout)

def Tanh(input_size, hidden_size, num_layers, bias=True, batch_first=False, dropout=0, bidirectional=False, output_size = None):
    """
    :class:`Tanh`
    """
    inputRNN = RNNCell(1, input_size, hidden_size, RNNTanhCell, 1, bias, output_size)
    return toRNNBackend(inputRNN, num_layers, bidirectional, dropout=dropout)
        
def mLSTM(input_size, hidden_size, num_layers, bias=True, batch_first=False, dropout=0, bidirectional=False, output_size = None):
    """
    :class:`mLSTM`
    """
    inputRNN = mLSTMRNNCell(input_size, hidden_size, bias=bias, output_size=output_size)
    return toRNNBackend(inputRNN, num_layers, bidirectional, dropout=dropout)


================================================
FILE: apex/apex/__init__.py
================================================
from . import parallel
from . import amp
from . import fp16_utils

# For optimizers and normalization there is no Python fallback.
# Absence of cuda backend is a hard error.
# I would like the errors from importing fused_adam_cuda or fused_layer_norm_cuda
# to be triggered lazily, because if someone has installed with --cpp_ext and --cuda_ext
# so they expect those backends to be available, but for some reason they actually aren't
# available (for example because they built improperly in a way that isn't revealed until
# load time) the error message is timely and visible.
from . import optimizers
from . import normalization


================================================
FILE: apex/apex/amp/README.md
================================================
# amp: Automatic Mixed Precision

## Annotating User Functions

Nearly all PyTorch user code needs nothing more than the two steps
above to use amp. After all, custom layers are built out of simpler
PyTorch components, and amp already can see those.

However, any custom C++ or CUDA code is outside of amp's (default)
view of things. For example, suppose I implemented a new recurrent
cell called a "forgetful recurrent unit" that calls directly into a
CUDA backend:

```python
from backend import FRUBackend

def fru(input, hidden, weight, bias):
    # call to CUDA code
    FRUBackend(input, hidden, weight, bias)
```

In this case, it is possible to get a runtime type mismatch. For
example, you might have `input` in fp16, and `weight` in fp32, and amp
doesn't have the visibility to insert an appropriate cast.

amp exposes two ways to handle "invisible" backend code: function
annotations and explicit registration.

#### Function annotation

The first way to handle backend code is a set of function annotations:

- `@amp.half_function`
- `@amp.float_function`
- `@amp.promote_function`

These correspond to:

- Cast all arguments to fp16
- Cast all argumnets fo fp32
- If there are any type mismatches, cast everything to the widest type

In our example, we believe that the FRU unit is fp16-safe and will get
performance gains from casting its arguments to fp16, so we write:

```python
@amp.half_function
def fru(input, hidden, weight, bias):
    #...
```

#### Explicit registration

The other way to handle backend code is with explicit function
registration:

- `amp.register_half_function(module, function_name)`
- `amp.register_float_function(module, function_name)`
- `amp.register_promote_function(module, function_name)`

When using this API, `module` is the containing class or module for
the function, and `function_name` is the _string_ name of the
function. Note that the function must be registered before the call to
`amp.initalize()`.

For our FRU unit, we can register the backend function directly:

```python
import backend

amp.register_half_function(backend, 'FRUBackend')
```


================================================
FILE: apex/apex/amp/__init__.py
================================================
from .amp import init, half_function, float_function, promote_function,\
    register_half_function, register_float_function, register_promote_function
from .handle import scale_loss, disable_casts
from .frontend import initialize
from ._amp_state import master_params, _amp_state


================================================
FILE: apex/apex/amp/__version__.py
================================================
VERSION = (0, 1, 0)
__version__ = '.'.join(map(str, VERSION))


================================================
FILE: apex/apex/amp/_amp_state.py
================================================
# This is a "header object" that allows different amp modules to communicate.
# I'm a C++ guy, not a python guy.  I decided this approach because it seemed most C++-like.  
# But apparently it's ok:
# http://effbot.org/pyfaq/how-do-i-share-global-variables-across-modules.htm
import os
import torch

TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])

if TORCH_MAJOR == 0:
    import collections.abc as container_abcs
else:
    from torch._six import container_abcs


class AmpState(object):
    def __init__(self):
        self.hard_override=False
        self.allow_incoming_model_not_fp32 = False
        self.verbosity=1


# Attribute stash.  Could also just stash things as global module attributes.
_amp_state = AmpState()


def warn_or_err(msg):
    if _amp_state.hard_override:
        print("Warning:  " + msg)
    else:
        raise RuntimeError(msg)
        # I'm not sure if allowing hard_override is a good idea.
        # + "  If you're sure you know what you're doing, supply " +
        #                    "hard_override=True to amp.initialize.")


distributed = False
if 'WORLD_SIZE' in os.environ:
    distributed = int(os.environ['WORLD_SIZE']) > 1


def maybe_print(msg, rank0=False):
    if _amp_state.verbosity > 0:
        if rank0:
            if distributed:
                if torch.distributed.get_rank() == 0:
                    print(msg)
            else:
                print(msg)
        else:
            print(msg)


# def iter_params(param_groups):
#     for group in param_groups:
#         for p in group['params']:
#             yield p


def master_params(optimizer):
    """
    Generator expression that iterates over the params owned by ``optimizer``.

    Args:
        optimizer: An optimizer previously returned from ``amp.initialize``.
    """
    for group in optimizer.param_groups:
        for p in group['params']:
            yield p


================================================
FILE: apex/apex/amp/_initialize.py
================================================
import torch
from torch._six import string_classes
import functools
import numpy as np
import warnings
from ._amp_state import _amp_state, warn_or_err, container_abcs
from .handle import disable_casts
from .scaler import LossScaler
from ._process_optimizer import _process_optimizer
from apex.fp16_utils import convert_network
from ..fp16_utils import FP16_Optimizer as FP16_Optimizer_general
from ..optimizers import FP16_Optimizer as FP16_Optimizer_for_fused
from ..optimizers import FusedAdam
from ..parallel import DistributedDataParallel as apex_DDP
from ..parallel.LARC import LARC


def to_type(dtype, t):
    if isinstance(t, torch.Tensor):
        if not t.is_cuda:
            # This should not be a hard error, since it may be legitimate.
            warnings.warn("An input tensor was not cuda.")
        # GANs require this.
        # if t.requires_grad:
        #     warn_or_err("input data requires grad.  Since input data is not a model parameter,\n"
        #         "its gradients will not be properly allreduced by DDP.")
        if t.is_floating_point():
            return t.to(dtype)
        return t
    else:
        # Trust the user's custom batch type, that's all I can do here.
        return t.to(dtype)


# Modified from torch.optim.optimizer.py.  This is a bit more general than casted_args in utils.py.
def applier(value, fn):
    if isinstance(value, torch.Tensor):
        return fn(value)
    elif isinstance(value, string_classes):
        return value
    elif isinstance(value, np.ndarray):
        return value
    elif hasattr(value, "to"): # Allow handling of custom batch classes
        return fn(value)
    elif isinstance(value, container_abcs.Mapping):
        return {applier(k, fn) : applier(v, fn) for k, v in value.items()}
    elif isinstance(value, container_abcs.Iterable):
        return type(value)(applier(v, fn) for v in value)
    else:
        # Do I want this to fire off even if someone chooses to pass something ordinary like
        # an int or float?  May be more annoying than it's worth.
        # print("Warning:  unrecognized type in applier.  If your input data is a custom class, "
        #     "provide it with a .to(dtype) method which converts its floating-point Tensors to dtype. "
        #     "Amp will check for your custom to() and invoke it to cast the batch's "
        #     "floating-point Tensors to the appropriate type. "
        #     "Also, if your data is a custom class, it is your responsibility to ensure that "
        #     "any Tensors you want to be cuda are already cuda."
        return value


def check_models(models):
    for model in models:
        parallel_type = None
        if isinstance(model, torch.nn.parallel.DistributedDataParallel):
            parallel_type = "torch.nn.parallel.DistributedDataParallel"
        if isinstance(model, apex_DDP):
            parallel_type = "apex.parallel.DistributedDataParallel"
        if isinstance(model, torch.nn.parallel.DataParallel):
            parallel_type = "torch.nn.parallel.DataParallel"
        if parallel_type is not None:
            raise RuntimeError("Incoming model is an instance of {}. ".format(parallel_type) +
                "Parallel wrappers should only be applied to the model(s) AFTER \n"
                "the model(s) have been returned from amp.initialize.")


def check_params_fp32(models):
    for model in models:
        for name, param in model.named_parameters():
            if param.is_floating_point():
                if 'Half' in param.type():
                    warn_or_err("Found param {} with type {}, expected torch.cuda.FloatTensor.\n"
                        "When using amp.initialize, you do not need to call .half() on your model\n"
                        "before passing it, no matter what optimization level you choose.".format(
                        name, param.type()))
                elif not param.is_cuda:
                    warn_or_err("Found param {} with type {}, expected torch.cuda.FloatTensor.\n"
                        "When using amp.initialize, you need to provide a model with parameters\n"
                        "located on a CUDA device before passing it no matter what optimization level\n"
                        "you chose. Use model.to('cuda') to use the default device.".format(
                        name, param.type()))

        # Backward compatibility for PyTorch 0.4
        if hasattr(model, 'named_buffers'):
            buf_iter = model.named_buffers()
        else:
            buf_iter = model._buffers
        for obj in buf_iter:
            if type(obj)==tuple:
                name, buf = obj
            else:
                name, buf = obj, buf_iter[obj]
            if buf.is_floating_point():
                if 'Half' in buf.type():
                    warn_or_err("Found buffer {} with type {}, expected torch.cuda.FloatTensor.\n"
                        "When using amp.initialize, you do not need to call .half() on your model\n"
                        "before passing it, no matter what optimization level you choose.".format(
                        name, buf.type()))
                elif not buf.is_cuda:
                    warn_or_err("Found buffer {} with type {}, expected torch.cuda.FloatTensor.\n"
                        "When using amp.initialize, you need to provide a model with buffers\n"
                        "located on a CUDA device before passing it no matter what optimization level\n"
                        "you chose. Use model.to('cuda') to use the default device.".format(
                        name, buf.type()))


def check_optimizers(optimizers):
    for optim in optimizers:
        bad_optim_type = None
        if isinstance(optim, FP16_Optimizer_general):
            bad_optim_type = "apex.fp16_utils.FP16_Optimizer"
        if isinstance(optim, FP16_Optimizer_for_fused):
            bad_optim_type = "apex.optimizers.FP16_Optimizer"
        if bad_optim_type is not None:
            raise RuntimeError("An incoming optimizer is an instance of {}. ".format(bad_optim_type) +
                               "The optimizer(s) passed to amp.initialize() must be bare \n"
                               "instances of either ordinary Pytorch optimizers, or Apex fused \n"
                               "optimizers (currently just FusedAdam, but FusedSGD will be added \n"
                               "soon).  You should not manually wrap your optimizer in either \n"
                               "apex.fp16_utils.FP16_Optimizer or apex.optimizers.FP16_Optimizer. \n"
                               "amp.initialize will take care of that for you (if necessary) based \n"
                               "on the specified opt_level (and optional overridden properties).")


def wrap_fused_adam(optimizer, properties):
    msg = 'Currently, the usage of FusedAdam is restricted to '\
          'amp.initialize(..., opt_level="O2", keep_batchnorm_fp32=False, '\
          'loss_scale=float or "dynamic").  We are working on enabling more general usage.'

    assert properties.master_weights is True, msg
    assert properties.cast_model_type is torch.float16, msg
    assert (properties.keep_batchnorm_fp32 is False or
            properties.keep_batchnorm_fp32 is None), msg

    if properties.loss_scale == "dynamic":
        return FP16_Optimizer_for_fused(optimizer, dynamic_loss_scale=True)
    else:
        return FP16_Optimizer_for_fused(optimizer, static_loss_scale=properties.loss_scale)


def _initialize(models, optimizers, properties, num_losses=1, cast_model_outputs=None):
    from apex.parallel import DistributedDataParallel as apex_DDP
    from .amp import init as amp_init

    optimizers_was_list = False
    if isinstance(optimizers, torch.optim.Optimizer) or isinstance(optimizers, LARC):
        optimizers = [optimizers]
    elif optimizers is None:
        optimizers = []
    elif isinstance(optimizers, list):
        optimizers_was_list = True
        check_optimizers(optimizers)
    else:
        check_optimizers([optimizers])
        raise TypeError("optimizers must be either a single optimizer or a list of optimizers.")

    if isinstance(models, torch.nn.Module):
        models_was_list = False
        models = [models]
    elif isinstance(models, list):
        models_was_list = True
    else:
        raise TypeError("models must be either a single model or a list of models.")

    check_models(models)

    if not _amp_state.allow_incoming_model_not_fp32:
        check_params_fp32(models)


    # In the future, when FP16_Optimizer can be deprecated and master weights can
    # become an attribute, remember to stash master weights before casting the model.

    if properties.cast_model_type:
        if properties.keep_batchnorm_fp32:
            for model in models:
                convert_network(model, properties.cast_model_type)
        else:
            for model in models:
                model.to(properties.cast_model_type)

        input_caster = functools.partial(to_type, properties.cast_model_type)
        if cast_model_outputs is not None:
            output_caster = functools.partial(to_type, cast_model_outputs)
        else:
            output_caster = functools.partial(to_type, torch.float32)

        for model in models:
            # Patch the forward method to cast incoming data to the correct type, and
            # outgoing data to float32, so "the user never needs to call .half()."
            # I like writing things explicitly more than decorators.
            def patch_forward(old_fwd):
                def new_fwd(*args, **kwargs):
                    output = old_fwd(*applier(args, input_caster),
                                     **applier(kwargs, input_caster))
                    return applier(output, output_caster)
                return new_fwd

            model.forward = patch_forward(model.forward)

        # State dict trick to recast any preexisting per-param state tensors 
        for optimizer in optimizers:
            optimizer.load_state_dict(optimizer.state_dict())
    elif cast_model_outputs is not None:
        output_caster = functools.partial(to_type, cast_model_outputs)

        for model in models:
            def patch_forward(old_fwd):
                def new_fwd(*args, **kwargs):
                    output = old_fwd(*args, **kwargs)
                    return applier(output, output_caster)
                return new_fwd

            model.forward = patch_forward(model.forward)

    for i, optimizer in enumerate(optimizers):
        # Still need to special case this for the first pass
        if isinstance(optimizer, FusedAdam):
            optimizers[i] = wrap_fused_adam(optimizer, properties)
        else:
            optimizers[i] = _process_optimizer(optimizer, properties)

    _amp_state.loss_scalers = []
    for _ in range(num_losses):
        _amp_state.loss_scalers.append(LossScaler(properties.loss_scale,
                                                  min_loss_scale=_amp_state.min_loss_scale,
                                                  max_loss_scale=_amp_state.max_loss_scale))

    if properties.patch_torch_functions:
        # handle is unused here. It's accessible later through a global value anyway.
        handle = amp_init(loss_scale=properties.loss_scale, verbose=(_amp_state.verbosity == 2))
        for optimizer in optimizers:
            # Disable Amp casting for the optimizer step, because it should only be
            # applied to FP32 master params anyway.
            def patch_step(old_step):
                def new_step(*args, **kwargs):
                    with disable_casts():
                        output = old_step(*args, **kwargs)
                    return output
                return new_step

            optimizer.step = patch_step(optimizer.step)

    if optimizers_was_list:
        if models_was_list:
            return models, optimizers
        else:
            return models[0], optimizers
    else:
        if models_was_list:
            if len(optimizers) == 0:
                return models
            else:
                return models, optimizers[0]
        else:
            if len(optimizers) == 0:
                return models[0]
            else:
                return models[0], optimizers[0]


================================================
FILE: apex/apex/amp/_process_optimizer.py
================================================
import types
from ..fp16_utils import master_params_to_model_params
from ..multi_tensor_apply import multi_tensor_applier
from ._amp_state import maybe_print
import torch


class AmpOptimizerState(object):
    def __init__(self):
        pass


def lazy_init_with_master_weights(self):
        stash = self._amp_stash
        stash.fp16_groups = []
        stash.fp32_from_fp16_groups = []
        stash.fp32_from_fp32_groups = []
        for i, param_group in enumerate(self.param_groups):
            # maybe_print("FP16_Optimizer processing param group {}:".format(i))
            fp16_params_this_group = []
            fp32_params_this_group = []
            fp32_from_fp16_params_this_group = []
            for i, param in enumerate(param_group['params']):
                if param.requires_grad:
                    if param.type() == 'torch.cuda.HalfTensor':
                        # maybe_print("FP16_Optimizer received torch.cuda.HalfTensor with {}"
                        #             .format(param.size()))
                        fp16_params_this_group.append(param)
                        master_param = param.detach().clone().float()
                        master_param.requires_grad = True
                        param_group['params'][i] = master_param
                        fp32_from_fp16_params_this_group.append(master_param)
                        # Reset existing state dict key to the new master param.
                        # We still need to recast per-param state tensors, if any, to FP32.
                        if param in self.state:
                           self.state[master_param] = self.state.pop(param)
                    elif param.type() == 'torch.cuda.FloatTensor':
                        # maybe_print("FP16_Optimizer received torch.cuda.FloatTensor with {}"
                        #             .format(param.size()))
                        fp32_params_this_group.append(param)
                        param_group['params'][i] = param
                    else:
                        raise TypeError("Optimizer's parameters must be either "
                                        "torch.cuda.FloatTensor or torch.cuda.HalfTensor. "
                                        "Received {}".format(param.type()))

            stash.fp16_groups.append(fp16_params_this_group)
            stash.fp32_from_fp16_groups.append(fp32_from_fp16_params_this_group)
            stash.fp32_from_fp32_groups.append(fp32_params_this_group)

        stash.all_fp16_params = []
        for group in stash.fp16_groups:
            stash.all_fp16_params += group

        stash.all_fp32_from_fp16_params = []
        for group in stash.fp32_from_fp16_groups:
            stash.all_fp32_from_fp16_params += group

        stash.all_fp32_from_fp32_params = []
        for group in stash.fp32_from_fp32_groups:
            stash.all_fp32_from_fp32_params += group

        # stash.all_fp32_from_fp16_grad_stash = [None for _ in stash.all_fp32_from_fp16_params]
        stash.all_fp32_from_fp32_grad_stash = [None for _ in stash.all_fp32_from_fp32_params]

        for param in stash.all_fp32_from_fp16_params:
            param.grad = None

        for param in stash.all_fp32_from_fp32_params:
            param.grad = None

        # Leverage state_dict() and load_state_dict() to recast preexisting per-param state tensors
        self.load_state_dict(self.state_dict())


def prepare_backward_with_master_weights(self):
    stash = self._amp_stash

    if not stash.lazy_init_called:
        self._lazy_init_maybe_master_weights()
        stash.lazy_init_called = True

    for i, param in enumerate(stash.all_fp16_params):
        # Set up to leverage grad copy elision:
        param.grad = None

    # for i, param in enumerate(stash.all_fp32_from_fp16_params):
    #     stash.all_fp32_from_fp16_grad_stash[i] = param.grad

    for i, param in enumerate(stash.all_fp32_from_fp32_params):
        stash.all_fp32_from_fp32_grad_stash[i] = param.grad
        # Set up to leverage grad copy elision:
        param.grad = None


def post_backward_with_master_weights(self, scaler):
    stash = self._amp_stash

    # This is a lot of python overhead...
    fp16_grads_needing_unscale = []
    new_fp32_grads = []
    fp16_grads_needing_unscale_with_stash = []
    preexisting_fp32_grads = []
    for fp16_param, fp32_param in zip(stash.all_fp16_params,
                                      stash.all_fp32_from_fp16_params):
        if fp16_param.grad is None and fp32_param.grad is not None:
            continue
        elif fp16_param.grad is not None and fp32_param.grad is None:
            fp32_param.grad = torch.empty_like(fp32_param)
            fp16_grads_needing_unscale.append(fp16_param.grad)
            new_fp32_grads.append(fp32_param.grad)
        elif fp16_param.grad is not None and fp32_param.grad is not None:
            fp16_grads_needing_unscale_with_stash.append(fp16_param.grad)
            preexisting_fp32_grads.append(fp32_param.grad)
        else: # fp16_param.grad is None and fp32_param.grad is None:
            continue

    if len(fp16_grads_needing_unscale) > 0:
        scaler.unscale(
            fp16_grads_needing_unscale,
            new_fp32_grads,
            scaler.loss_scale(),
            models_are_masters=False)

    if len(fp16_grads_needing_unscale_with_stash) > 0:
        scaler.unscale_with_stashed(
            fp16_grads_needing_unscale_with_stash,
            preexisting_fp32_grads,
            preexisting_fp32_grads)

    # fp32 params can be treated as they would be in the "no_master_weights" case.
    grads_needing_unscale = []
    grads_needing_unscale_with_stash = []
    stashed = []
    for param, stashed_grad in zip(stash.all_fp32_from_fp32_params,
                                   stash.all_fp32_from_fp32_grad_stash):
        if param.grad is None and stashed_grad is not None:
            param.grad = stashed_grad
        elif param.grad is not None and stashed_grad is None:
            grads_needing_unscale.append(param.grad)
        elif param.grad is not None and stashed_grad is not None:
            grads_needing_unscale_with_stash.append(param.grad)
            stashed.append(stashed_grad)
        else: # param.grad is None and stashed_grad is None:
            continue

    if len(grads_needing_unscale) > 0:
        scaler.unscale(
            grads_needing_unscale,
            grads_needing_unscale,
            scaler.loss_scale(),
            models_are_masters=True)

    if len(grads_needing_unscale_with_stash) > 0:
        scaler.unscale_with_stashed(
            grads_needing_unscale_with_stash,
            stashed,
            grads_needing_unscale_with_stash)

    # Clear the stash.
    for i in range(len(stash.all_fp32_from_fp32_grad_stash)):
        stash.all_fp32_from_fp32_grad_stash[i] = None


def lazy_init_no_master_weights(self):
    stash = self._amp_stash
    stash.all_fp16_params = []
    stash.all_fp32_params = []
    for i, param_group in enumerate(self.param_groups):
        for i, param in enumerate(param_group['params']):
            if param.type() == 'torch.cuda.HalfTensor':
                stash.all_fp16_params.append(param)
            elif param.type() == 'torch.cuda.FloatTensor':
                stash.all_fp32_params.append(param)
            else:
                raise TypeError("Optimizer's parameters must be either "
                                "torch.cuda.FloatTensor or torch.cuda.HalfTensor. "
                                "Received {}".format(param.type()))

    stash.all_fp16_grad_stash = [None for _ in stash.all_fp16_params]
    stash.all_fp32_grad_stash = [None for _ in stash.all_fp32_params]


def prepare_backward_no_master_weights(self):
    stash = self._amp_stash

    if not stash.lazy_init_called:
        self._lazy_init_maybe_master_weights()
        stash.lazy_init_called = True

    for i, param in enumerate(stash.all_fp16_params):
        stash.all_fp16_grad_stash[i] = param.grad
        # Set up to leverage grad copy elision:
        param.grad = None

    for i, param in enumerate(stash.all_fp32_params):
        stash.all_fp32_grad_stash[i] = param.grad
        # Set up to leverage grad copy elision:
        param.grad = None


def post_backward_no_master_weights(self, scaler):
    stash = self._amp_stash

    split_types = ((stash.all_fp16_params, stash.all_fp16_grad_stash),
             (stash.all_fp32_params, stash.all_fp32_grad_stash))

    for params, stashed_grads in split_types:
        # This is a lot of python overhead...
        grads_needing_unscale = []
        grads_needing_unscale_with_stash = []
        stashed = []
        for param, stashed_grad in zip(params, stashed_grads):
            if param.grad is None and stashed_grad is not None:
                param.grad = stashed_grad
            elif param.grad is not None and stashed_grad is None:
                grads_needing_unscale.append(param.grad)
            elif param.grad is not None and stashed_grad is not None:
                grads_needing_unscale_with_stash.append(param.grad)
                stashed.append(stashed_grad)
            else: # param.grad is None and stashed_grad is None
                continue

        if len(grads_needing_unscale) > 0:
            scaler.unscale(
                grads_needing_unscale,
                grads_needing_unscale,
                scaler.loss_scale(),
                models_are_masters=True)

        if len(grads_needing_unscale_with_stash) > 0:
            scaler.unscale_with_stashed(
                grads_needing_unscale_with_stash,
                stashed,
                grads_needing_unscale_with_stash)

        # Clear the stash.
        for i in range(len(stashed_grads)):
            stashed_grads[i] = None


def _master_params_to_model_params(self):
    stash = self._amp_stash
    if multi_tensor_applier.available:
        if len(stash.all_fp16_params) > 0:
            multi_tensor_applier(
                stash.multi_tensor_scale,
                stash.dummy_overflow_buf,
                [stash.all_fp32_from_fp16_params, stash.all_fp16_params],
                1.0)
    else:
        for fp16_group, fp32_from_fp16_group in zip(stash.fp16_groups, stash.fp32_from_fp16_groups):
            master_params_to_model_params(fp16_group, fp32_from_fp16_group)


def _process_optimizer(optimizer, properties):
    if hasattr(optimizer, "_amp_stash"):
        raise RuntimeError("A given optimizer should only be passed through amp.initialize once.")
    else:
        optimizer._amp_stash = AmpOptimizerState()

    optimizer._amp_stash.lazy_init_called = False
    optimizer._amp_stash.already_patched = False
    optimizer._amp_stash.params_have_scaled_gradients = False

    for name in ("_lazy_init_maybe_master_weights",
                 "_master_params_to_model_params",
                 "_prepare_amp_backward",
                 "_post_amp_backward"):
        if hasattr(optimizer, name):
            raise RuntimeError("Incoming optimizer already has {} defined.".format(name))

    # TODO:  Centralize exposure and import error checking for the C backend.
    if multi_tensor_applier.available:
        import amp_C
        optimizer._amp_stash.multi_tensor_scale = amp_C.multi_tensor_scale
        optimizer._amp_stash.dummy_overflow_buf = torch.cuda.IntTensor([0]);

    if properties.master_weights:
        optimizer._lazy_init_maybe_master_weights = types.MethodType(
            lazy_init_with_master_weights, optimizer)

        optimizer._master_params_to_model_params = types.MethodType(
            _master_params_to_model_params, optimizer)

        old_step = optimizer.step
        def new_step(self, closure=None):
            if closure is not None:
                raise RuntimeError("Currently, Amp does not support closure use with optimizers.")
            retval = old_step()
            self._master_params_to_model_params()
            # Clear the master grads that wouldn't be zeroed by model.zero_grad()
            for param in self._amp_stash.all_fp32_from_fp16_params:
                param.grad = None
            return retval
        optimizer.step = types.MethodType(new_step, optimizer)

        old_zero_grad = optimizer.zero_grad
        def new_zero_grad(self):
            stash = self._amp_stash
            if not stash.lazy_init_called:
                self._lazy_init_maybe_master_weights()
                stash.lazy_init_called = True
            # Zero the model grads.
            for param in stash.all_fp16_params:
                if param.grad is not None:
                    param.grad.detach_()
                    param.grad.zero_()
            for param in stash.all_fp32_from_fp32_params:
                if param.grad is not None:
                    param.grad.detach_()
                    param.grad.zero_()
            # Clear the master grads that are independent of model grads
            for param in self._amp_stash.all_fp32_from_fp16_params:
                param.grad = None
        optimizer.zero_grad = types.MethodType(new_zero_grad, optimizer)

        optimizer._prepare_amp_backward = types.MethodType(
            prepare_backward_with_master_weights, optimizer)

        optimizer._post_amp_backward = types.MethodType(
            post_backward_with_master_weights, optimizer)
    else:
        optimizer._lazy_init_maybe_master_weights = types.MethodType(
            lazy_init_no_master_weights, optimizer)

        optimizer._prepare_amp_backward = types.MethodType(
            prepare_backward_no_master_weights, optimizer)

        optimizer._post_amp_backward = types.MethodType(
            post_backward_no_master_weights, optimizer)

    old_add_param_group = optimizer.add_param_group

    def new_add_param_group(self, new_group):
        stash = self._amp_stash

        if not stash.lazy_init_called:
            self._lazy_init_maybe_master_weights()
            stash.lazy_init_called = True

        assert isinstance(new_group, dict), "param group must be a dict"

        new_params = new_group['params']
        if isinstance(new_params, torch.Tensor):
            new_group['params'] = [new_params]
        elif isinstance(new_params, set):
            raise TypeError('optimizer parameters need to be organized in ordered collections, but '
                            'the ordering of tensors in sets will change between runs. Please use a list instead.')
        else:
            new_group['params'] = list(new_params)

        if properties.master_weights:
            # Mutate new_group in-place to use FP32 master params
            fp16_params_this_group = []
            fp32_params_this_group = []
            fp32_from_fp16_params_this_group = []
            for i, param in enumerate(new_group['params']):
                if param.requires_grad:
                    if param.type() == 'torch.cuda.HalfTensor':
                        fp16_params_this_group.append(param)
                        master_param = param.detach().clone().float()
                        master_param.requires_grad = True
                        new_group['params'][i] = master_param
                        fp32_from_fp16_params_this_group.append(master_param)
                    elif param.type() == 'torch.cuda.FloatTensor':
                        fp32_params_this_group.append(param)
                        new_group['params'][i] = param
                    else:
                        raise TypeError("Optimizer's parameters must be either "
                                        "torch.cuda.FloatTensor or torch.cuda.HalfTensor. "
                                        "Received {}".format(param.type()))

            stash.fp16_groups.append(fp16_params_this_group)
            stash.fp32_from_fp16_groups.append(fp32_from_fp16_params_this_group)
            stash.fp32_from_fp32_groups.append(fp32_params_this_group)

            stash.all_fp16_params += fp16_params_this_group
            stash.all_fp32_from_fp16_params += fp32_from_fp16_params_this_group
            stash.all_fp32_from_fp32_params += fp32_params_this_group

            # stash.all_fp32_from_fp16_grad_stash = [None for _ in stash.all_fp32_from_fp16_params]
            stash.all_fp32_from_fp32_grad_stash += [None for _ in fp32_params_this_group]

            # It should be ok to let params be added with existing .grad attributes.
            # for param in fp16_params_this_group:
            #     param.grad = None

            # for param in fp32_from_fp16_params_this_group:
            #     param.grad = None

            # for param in stash.fp32_params_this_group:
            #     param.grad = None
        else:
            for param in new_group['params']:
                if param.type() == 'torch.cuda.HalfTensor':
                    stash.all_fp16_params.append(param)
                    stash.all_fp16_grad_stash.append(None)
                elif param.type() == 'torch.cuda.FloatTensor':
                    stash.all_fp32_params.append(param)
                    stash.all_fp32_grad_stash.append(None)
                else:
                    raise TypeError("Optimizer's parameters must be either "
                                    "torch.cuda.FloatTensor or torch.cuda.HalfTensor. "
                                    "Received {}".format(param.type()))

        old_add_param_group(new_group)

    optimizer.add_param_group = types.MethodType(new_add_param_group, optimizer)

    return optimizer


================================================
FILE: apex/apex/amp/amp.py
================================================
from . import compat, rnn_compat, utils, wrap
from .handle import AmpHandle, NoOpHandle
from .lists import functional_overrides, torch_overrides, tensor_overrides
from ._amp_state import _amp_state
from .frontend import *

import functools
import itertools

import torch


_DECORATOR_HANDLE = None
_USER_CAST_REGISTRY = set()
_USER_PROMOTE_REGISTRY = set()


def _decorator_helper(orig_fn, cast_fn, wrap_fn):
    def wrapper(*args, **kwargs):
        handle = _DECORATOR_HANDLE
        if handle is None or not handle.is_active():
            return orig_fn(*args, **kwargs)
        inner_cast_fn = utils.verbosify(cast_fn, orig_fn.__name__,
                                  handle.verbose)
        return wrap_fn(orig_fn, inner_cast_fn, handle)(*args, **kwargs)
    return wrapper


# Decorator form
def half_function(fn):
    wrap_fn = functools.partial(wrap.make_cast_wrapper, try_caching=True)
    return _decorator_helper(fn, utils.maybe_half, wrap_fn)


def float_function(fn):
    wrap_fn = functools.partial(wrap.make_cast_wrapper, try_caching=False)
    return _decorator_helper(fn, utils.maybe_float, wrap_fn)


def promote_function(fn):
    wrap_fn = functools.partial(wrap.make_promote_wrapper)
    return _decorator_helper(fn, utils.maybe_float, wrap_fn)


# Registry form
def register_half_function(module, name):
    if not hasattr(module, name):
        raise ValueError('No function named {} in module {}.'.format(
            name, module))
    _USER_CAST_REGISTRY.add((module, name, utils.maybe_half))


def register_float_function(module, name):
    if not hasattr(module, name):
        raise ValueError('No function named {} in module {}.'.format(
            name, module))
    _USER_CAST_REGISTRY.add((module, name, utils.maybe_float))


def register_promote_function(module, name):
    if not hasattr(module, name):
        raise ValueError('No function named {} in module {}.'.format(
            name, module))
    _USER_PROMOTE_REGISTRY.add((module, name))


# Top-level function to insert _all_ the hooks.
def init(enabled=True, loss_scale="dynamic", enable_caching=True, verbose=False, allow_banned=False):
    global _DECORATOR_HANDLE

    if not enabled:
        handle = NoOpHandle()
        _DECORATOR_HANDLE = handle
        return handle

    handle = AmpHandle(loss_scale, enable_caching, verbose)

    # 0) Force-{fp16, fp32} for user-annotated functions
    for mod, fn, cast_fn in _USER_CAST_REGISTRY:
        try_caching = (cast_fn == utils.maybe_half)
        wrap.cached_cast(mod, fn, cast_fn, handle,
                         try_caching, verbose)
    _USER_CAST_REGISTRY.clear()

    # 0.5) Force-promote for user-annotated functions
    for mod, fn in _USER_PROMOTE_REGISTRY:
        wrap.promote(mod, fn, handle, verbose)
    _USER_PROMOTE_REGISTRY.clear()

    # 1) Force-{fp16, fp32} on white- / black-list functions
    override_modules = [functional_overrides,
                        torch_overrides,
                        tensor_overrides]
    cast_table = [('FP16_FUNCS', utils.maybe_half),
                  ('FP32_FUNCS', utils.maybe_float)]
    for module, (list_name, cast_fn) in itertools.product(override_modules,
                                                          cast_table):
        for fn in getattr(module, list_name):
            try_caching = (cast_fn == utils.maybe_half)
            wrap.cached_cast(module.MODULE, fn, cast_fn, handle,
                             try_caching, verbose)

    # 1.5) Pre-0.4, put the blacklist methods on HalfTensor and whitelist
    #      methods on FloatTensor, since they're distinct types.
    if compat.tensor_is_float_tensor():
        for fn in tensor_overrides.FP16_FUNCS:
            wrap.cached_cast(torch.cuda.FloatTensor, fn, utils.maybe_half,
                             handle, try_caching=True, verbose=verbose)
        for fn in tensor_overrides.FP32_FUNCS:
            wrap.cached_cast(torch.cuda.HalfTensor, fn, utils.maybe_float,
                             handle, try_caching=False, verbose=verbose)

    # 2) Enable type-promotion on multi-arg functions and methods.
    #    NB: special handling for sequence fns (e.g. `torch.cat`).
    promote_modules = [torch_overrides, tensor_overrides]
    promote_table = [('CASTS', wrap.promote),
                     ('SEQUENCE_CASTS', wrap.sequence_promote)]
    for promote_mod, (list_name, promote_fn) in itertools.product(promote_modules,
                                                                  promote_table):
        for fn in getattr(promote_mod, list_name):
            promote_fn(promote_mod.MODULE, fn, handle, verbose)

    # 2.5) Pre-0.4, add blacklist methods directly to HalfTensor and FloatTensor types
    if compat.tensor_is_float_tensor():
        for cls, (list_name, promote_fn) in itertools.product([torch.cuda.FloatTensor,
                                                               torch.cuda.HalfTensor],
                                                              promote_table):
            for fn in getattr(tensor_overrides, list_name):
                promote_fn(cls, fn, handle, verbose)

    # 3) For any in-place version of a blacklist function, error if any input is fp16.
    #    NB: this is overly conservative.
    for fn in utils.as_inplace(torch_overrides.FP32_FUNCS):
        wrap.err_if_any_half(torch_overrides.MODULE, fn, handle)

    # 3.5) For any in-place blacklist method, error if called on fp16 tensor
    for fn in utils.as_inplace(tensor_overrides.FP32_FUNCS):
        wrap.err_if_arg0_half(tensor_overrides.MODULE, fn, handle, verbose)
        if compat.tensor_is_float_tensor():
            wrap.err_if_arg0_half(torch.cuda.HalfTensor, fn, handle, verbose)

    # 4) For other in-place methods, match the type of self tensor
    for fn in utils.as_inplace(itertools.chain(
            tensor_overrides.FP16_FUNCS,
            tensor_overrides.CASTS)):
        wrap.promote_match_arg0(tensor_overrides.MODULE, fn, handle, verbose)
        if compat.tensor_is_float_tensor():
            wrap.promote_match_arg0(torch.cuda.HalfTensor, fn, handle, verbose)
            wrap.promote_match_arg0(torch.cuda.FloatTensor, fn, handle, verbose)

    # 5) RNNs + RNN cells are whitelisted specially
    if rnn_compat.has_old_rnns():
        wrap.rnn_cast(torch.nn.backends.thnn.backend, 'RNN', handle, verbose)
    if not rnn_compat.has_old_rnns():
        # Patch in our own indirection of `_VF` in modules/rnn s.t. it is mutable.
        torch.nn.modules.rnn._VF = rnn_compat.VariableFunctionsShim()
        # Wrap all the rnns
        for x in rnn_compat.RNN_NAMES:
            wrap.new_rnn_cast(x.upper(), handle, verbose)

    # Wrap all the RNN cells
    rnn_compat.whitelist_rnn_cells(handle, verbose)

    # 6) Place error+print message on banned functions.
    #    Or, if allow_banned, then cast to FP32.
    for fn, err_msg in functional_overrides.BANNED_FUNCS:
        if allow_banned:
            wrap.cached_cast(functional_overrides.MODULE, fn, utils.maybe_float,
                             handle, try_caching=True, verbose=verbose)
        else:
            wrap.err_if_any_half(functional_overrides.MODULE, fn, handle, err_msg)

    _DECORATOR_HANDLE = handle

    _amp_state.handle = handle

    return handle


================================================
FILE: apex/apex/amp/compat.py
================================================
import torch

# True for post-0.4, when Variables/Tensors merged.
def variable_is_tensor():
    v = torch.autograd.Variable()
    return isinstance(v, torch.Tensor)

def tensor_is_variable():
    x = torch.Tensor()
    return type(x) == torch.autograd.Variable

# False for post-0.4
def tensor_is_float_tensor():
    x = torch.Tensor()
    return type(x) == torch.FloatTensor

# Akin to `torch.is_tensor`, but returns True for Variable
# objects in pre-0.4.
def is_tensor_like(x):
    return torch.is_tensor(x) or isinstance(x, torch.autograd.Variable)

# Wraps `torch.is_floating_point` if present, otherwise checks
# the suffix of `x.type()`.
def is_floating_point(x):
    if hasattr(torch, 'is_floating_point'):
        return torch.is_floating_point(x)
    try:
        torch_type = x.type()
        return torch_type.endswith('FloatTensor') or \
            torch_type.endswith('HalfTensor') or \
            torch_type.endswith('DoubleTensor')
    except AttributeError:
        return False

def scalar_python_val(x):
    if hasattr(x, 'item'):
        return x.item()
    else:
        if isinstance(x, torch.autograd.Variable):
            return x.data[0]
        else:
            return x[0]


================================================
FILE: apex/apex/amp/frontend.py
================================================
import torch
from ._initialize import _initialize
from ._amp_state import _amp_state, warn_or_err, maybe_print


class Properties(object):
    """
    This class has two purposes: to establish a set of default properties,
    and to route setting of these attributes through __setattr__ so that (in theory)
    they can be checked for consistency with other existing args.
    """
    def __init__(self):
        self.options = {
            "enabled" : False,
            "opt_level" : None,
            "cast_model_type" : None,
            "patch_torch_functions" : False,
            "keep_batchnorm_fp32" : None,
            "master_weights" : None,
            "loss_scale" : 1.0,
            # Reserved for future functionality
            # "fused_optimizer" : False,
            # "enable_ddp_interop" : False,
            }

    """
    This function allows updating several options at a time without routing through
    __setattr__ checks, to avoid "you can't get there from here" scenarios.
    Currently not intended to be exposed; users are expected to select an opt_level
    and apply consistent modifications.
    """
    def _update_options_dict(new_options):
        for k, v in new_options:
            if k in self.options:
                self.options[k] = v
            else:
                raise ValueError("Tried to set unexpected option {}".format(k))
    """
    The members of "options" are not direct attributes of self, so access attempts
    will roll down to __getattr__.  This borrows from the logic in torch.nn.Module.
    """
    def __getattr__(self, name):
        if "options" in self.__dict__:
            options =  self.__dict__["options"]
            if name in options:
                return options[name]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, name))

    def __setattr__(self, name, value):
        if "options" in self.__dict__:
            if name in self.options:
                # print("setting {} {}".format(name, value))
                if name == "cast_model_type":
                    if self.opt_level == "O1" and value is not None:
                        if value is not False:
                            if value is not torch.float32:
                                warn_or_err("O1 inserts casts around Torch functions rather than "
                                            "model weights, so with O1, the model weights themselves "
                                            "should remain FP32. If you wish to cast the model to a "
                                            "different type, use opt_level='O2' or 'O3'. " +
                                            "cast_model_type was {}".format(value))
                    self.options[name] = value
                elif name == "patch_torch_functions":
                    if self.opt_level != "O1" and value:
                        warn_or_err("Currently, patch_torch_functions=True should only be set by "
                                    "selecting opt_level='O1'.")
                    self.options[name] = value
                elif name == "keep_batchnorm_fp32":
                    if self.opt_level == "O1" and value is not None:
                        warn_or_err("With opt_level O1, batchnorm functions are automatically patched "
                                    "to run in FP32, so keep_batchnorm_fp32 should be None." +
                                    " keep_batchnorm_fp32 was {}".format(value))
                    if value == "False":
                        self.options[name] = False
                    elif value == "True":
                        self.options[name] = True
                    else:
                        assert (value is True or value is False or value is None),\
                            "keep_batchnorm_fp32 must be a boolean, the string 'True' or 'False', "\
                            "or None, found keep_batchnorm_fp32={}".format(value)
                        self.options[name] = value
                elif name == "master_weights":
                    if self.opt_level == "O1" and value is not None:
                        warn_or_err("It doesn't make sense to use master_weights with O1. "
                                    "With O1, your model weights themselves should be FP32.")
                    self.options[name] = value
                elif name == "loss_scale":
                    if value == "dynamic":
                        self.options[name] = value
                    else:
                        self.options[name] = float(value)
                else:
                    self.options[name] = value
        else:
            super(Properties, self).__setattr__(name, value)


""" O0-O3 are convenience wrappers to establish defaults for typically used mixed precision options. """

class O3:
    brief = "O3:  Pure FP16 training."
    more = "Calls .half() on your model, converting the entire model to FP16.\n"\
        "A casting operation is also inserted to cast incoming Tensors to FP16,\n"\
        "so you don't need to change your data pipeline.\n"\
        "This mode is useful for establishing a performance ceiling.\n"\
        "It's also possible training may 'just work' in this mode.\n"\
        "If not, try other optimization levels."

    def __call__(self, properties):
        properties.enabled = True
        properties.opt_level = "O3"
        properties.cast_model_type = torch.float16
        properties.patch_torch_functions = False
        properties.keep_batchnorm_fp32 = False
        properties.master_weights = False
        properties.loss_scale = 1.0
        # properties.fused_optimizer = False
        # properties.enable_ddp_interop = False
        return properties # modified in place so this isn't really necessary


class O2:
    brief = "O2:  FP16 training with FP32 batchnorm and FP32 master weights.\n"
    more = "Calls .half() on your model, converting the entire model (except for batchnorms)\n"\
        "to FP16.  Batchnorms are retained in FP32 for additional stability.\n"\
        "The forward pass is patched to cast incoming Tensors to FP16, so you don't need to change\n"\
        "your data pipeline.\n"\
        "O2 creates FP32 master weights outside the model and patches any optimizers to update\n"\
        "these master weights, then copy the master weights into the FP16 model weights.\n"\
        "Master weights can also improve convergence and stability."

    def __call__(self, properties):
        properties.enabled = True
        properties.opt_level = "O2"
        properties.cast_model_type = torch.float16
        properties.patch_torch_functions = False
        properties.keep_batchnorm_fp32 = True
        properties.master_weights = True
        properties.loss_scale = "dynamic"
        # properties.fused_optimizer = False
        # properties.enable_ddp_interop = False
        return properties # modified in place so this isn't really necessary


class O1:
    brief = "O1:  Insert automatic casts around Pytorch functions and Tensor methods.\n"
    more = "The type of your model's weights is not altered.  However, internally,\n"\
        "Pytorch functions are patched to cast any Tensor Core-friendly ops to FP16 for speed,\n"\
        "while operations that might benefit from the additional stability of FP32 are patched\n"\
        "to cast their inputs to fp32.\n"\
        "O1 is the safest way to try mixed precision training, and is recommended when\n"\
        "trying mixed precision training for the first time."

    def __call__(self, properties):
        properties.enabled = True
        properties.opt_level = "O1"
        properties.cast_model_type = None
        properties.patch_torch_functions = True
        properties.keep_batchnorm_fp32 = None
        properties.master_weights = None
        properties.loss_scale = "dynamic"
        # properties.fused_optimizer = False
        # properties.enable_ddp_interop = False
        return properties # modified in place so this isn't really necessary


class O0:
    brief = "O0:  Pure FP32 training.\n"
    more = "Your models are checked to make sure parameters are FP32, but otherwise the\n"\
        "types of weights and internal Pytorch operations are not altered.  This mode disables any\n"\
        "FP16 arithmetic, although other optimizations like DDP interop may still be requested.\n"

    def __call__(self, properties):
        properties.enabled = True
        properties.opt_level = "O0"
        properties.cast_model_type = torch.float32
        properties.patch_torch_functions = False
        properties.keep_batchnorm_fp32 = None
        properties.master_weights = False
        properties.loss_scale = 1.0
        # properties.fused_optimizer = False
        # properties.enable_ddp_interop = False
        return properties # modified in place so this isn't really necessary


opt_levels = {"O3": O3(),
              "O2": O2(),
              "O1": O1(),
              "O0": O0()}


# allow user to directly pass Properties struct as well?
def initialize(
    models,
    optimizers=None,
    enabled=True,
    opt_level="O1",
    cast_model_type=None,
    patch_torch_functions=None,
    keep_batchnorm_fp32=None,
    master_weights=None,
    loss_scale=None,
    cast_model_outputs=None,
    num_losses=1,
    verbosity=1,
    min_loss_scale=None,
    max_loss_scale=2.**24
    ):
    """
    Initialize your models, optimizers, and the Torch tensor and functional namespace according to the
    chosen ``opt_level`` and overridden properties, if any.

    ``amp.initialize`` should be called **after** you have finished
    constructing your model(s) and
    optimizer(s), but **before** you send your model through any DistributedDataParallel wrapper.
    See `Distributed training`_ in the Imagenet example.

    Currently, ``amp.initialize`` should only be called **once**,
    although it can process an arbitrary number of
    models and optimizers (see the corresponding `Advanced Amp Usage topic`_).
    If you think your use case requires ``amp.initialize`` to be called more than once,
    `let us know`_.

    Any property keyword argument that is not ``None`` will be interpreted as a manual override.

    To prevent having to rewrite anything else in your script, name the returned models/optimizers
    to replace the passed models/optimizers, as in the code sample below.

    Args:
        models (torch.nn.Module or list of torch.nn.Modules):  Models to modify/cast.
        optimizers (optional, torch.optim.Optimizer or list of torch.optim.Optimizers):  Optimizers to modify/cast.
            REQUIRED for training, optional for inference.
        enabled (bool, optional, default=True):  If False, renders all Amp calls no-ops, so your script
            should run as if Amp were not present.
        opt_level (str, optional, default="O1"):  Pure or mixed precision optimization level.  Accepted values are
            "O0", "O1", "O2", and "O3", explained in detail above.
        cast_model_type (``torch.dtype``, optional, default=None):  Optional property override, see
            above.
        patch_torch_functions (bool, optional, default=None):  Optional property override.
        keep_batchnorm_fp32 (bool or str, optional, default=None):  Optional property override.  If
            passed as a string, must be the string "True" or "False".
        master_weights (bool, optional, default=None):  Optional property override.
        loss_scale (float or str, optional, default=None):  Optional property override.  If passed as a string,
            must be a string representing a number, e.g., "128.0", or the string "dynamic".
        cast_model_outputs (torch.dtype, optional, default=None):  Option to ensure that the outputs
            of your model(s) are always cast to a particular type regardless of ``opt_level``.
        num_losses (int, optional, default=1):  Option to tell Amp in advance how many losses/backward
            passes you plan to use.  When used in conjunction with the ``loss_id`` argument to
            ``amp.scale_loss``, enables Amp to use a different loss scale per loss/backward pass,
            which can improve stability.  See "Multiple models/optimizers/losses"
            under `Advanced Amp Usage`_ for examples.  If ``num_losses`` is left to 1, Amp will still
            support multiple losses/backward passes, but use a single global loss scale
            for all of them.
        verbosity (int, default=1):  Set to 0 to suppress Amp-related output.
        min_loss_scale (float, default=None):  Sets a floor for the loss scale values that can be chosen by dynamic
            loss scaling.  The default value of None means that no floor is imposed.
            If dynamic loss scaling is not used, `min_loss_scale` is ignored.
        max_loss_scale (float, default=2.**24):  Sets a ceiling for the loss scale values that can be chosen by
            dynamic loss scaling.  If dynamic loss scaling is not used, `max_loss_scale` is ignored.

    Returns:
        Model(s) and optimizer(s) modified according to the ``opt_level``.
        If either the ``models`` or ``optimizers`` args were lists, the corresponding return value will
        also be a list.

    Permissible invocations::

        model, optim = amp.initialize(model, optim,...)
        model, [optim1, optim2] = amp.initialize(model, [optim1, optim2],...)
        [model1, model2], optim = amp.initialize([model1, model2], optim,...)
        [model1, model2], [optim1, optim2] = amp.initialize([model1, model2], [optim1, optim2],...)

        # This is not an exhaustive list of the cross product of options that are possible,
        # just a set of examples.
        model, optim = amp.initialize(model, optim, opt_level="O0")
        model, optim = amp.initialize(model, optim, opt_level="O0", loss_scale="dynamic"|128.0|"128.0")

        model, optim = amp.initialize(model, optim, opt_level="O1") # uses "loss_scale="dynamic" default
        model, optim = amp.initialize(model, optim, opt_level="O1", loss_scale=128.0|"128.0")

        model, optim = amp.initialize(model, optim, opt_level="O2") # uses "loss_scale="dynamic" default
        model, optim = amp.initialize(model, optim, opt_level="O2", loss_scale=128.0|"128.0")
        model, optim = amp.initialize(model, optim, opt_level="O2", keep_batchnorm_fp32=True|False|"True"|"False")

        model, optim = amp.initialize(model, optim, opt_level="O3") # uses loss_scale=1.0 default
        model, optim = amp.initialize(model, optim, opt_level="O3", loss_scale="dynamic"|128.0|"128.0")
        model, optim = amp.initialize(model, optim, opt_level="O3", keep_batchnorm_fp32=True|False|"True"|"False")

    The `Imagenet example`_ demonstrates live use of various opt_levels and overrides.

    .. _`Distributed training`:
        https://github.com/NVIDIA/apex/tree/master/examples/imagenet#distributed-training

    .. _`Imagenet example`:
        https://github.com/NVIDIA/apex/tree/master/examples/imagenet

    .. _`Advanced Amp Usage`:
        https://nvidia.github.io/apex/advanced.html

    .. _`Advanced Amp Usage topic`:
        https://nvidia.github.io/apex/advanced.html#multiple-models-optimizers-losses

    .. _`let us know`:
        https://github.com/NVIDIA/apex/issues
    """
    _amp_state.opt_properties = Properties()
    _amp_state.verbosity = verbosity

    if not enabled:
        if optimizers is None:
            return models
        else:
            return models, optimizers

    if not torch.backends.cudnn.enabled:
        raise RuntimeError(
            "Amp requires torch.backends.cudnn.enabled = True")

    if opt_level not in opt_levels:
        raise RuntimeError(
            "Unexpected optimization level {}. ".format(opt_level) +
            "Options are 'O0', 'O1', 'O2', 'O3'.  Note that in `O0`, `O1`, etc., the prefix O is the letter O, " +
            "not the number zero.")
    else:
        _amp_state.opt_properties = opt_levels[opt_level](_amp_state.opt_properties)
        maybe_print("Selected optimization level {}".format(opt_levels[opt_level].brief), True)
        maybe_print("Defaults for this optimization level are:", True)
        for k, v in _amp_state.opt_properties.options.items():
            maybe_print("{:22} : {}".format(k, v), True)

    _amp_state.min_loss_scale = min_loss_scale
    _amp_state.max_loss_scale = max_loss_scale

    maybe_print("Processing user overrides (additional kwargs that are not None)...", True)
    # I chose to have the keyword arguments listed directly in the argument list,
    # instead of **kwargs, so I can't use kwargs.items() here.
    if enabled is not None:
        _amp_state.opt_properties.enabled = enabled
    if opt_level is not None:
        _amp_state.opt_properties.opt_level = opt_level
    if cast_model_type is not None:
        _amp_state.opt_properties.cast_model_type = cast_model_type
    if patch_torch_functions is not None:
        _amp_state.opt_properties.patch_torch_functions = patch_torch_functions
    if keep_batchnorm_fp32 is not None:
        _amp_state.opt_properties.keep_batchnorm_fp32 = keep_batchnorm_fp32
    if master_weights is not None:
        _amp_state.opt_properties.master_weights = master_weights
    if loss_scale is not None:
        _amp_state.opt_properties.loss_scale = loss_scale

    maybe_print("After processing overrides, optimization options are:", True)
    for k, v in _amp_state.opt_properties.options.items():
        maybe_print("{:22} : {}".format(k, v), True)

    return _initialize(models, optimizers, _amp_state.opt_properties, num_losses, cast_model_outputs)


# TODO:  is this necessary/useful?
# def check_option_consistency(enabled=True,
#                              opt_level=None,
#                              cast_model_type=None,
#                              patch_torch_functions=None,
#                              keep_batchnorm_fp32=None,
#                              master_weights=None,
#                              loss_scale=None,
#                              enable_ddp_interop=None,
#                              hard_override=False):
#     """
#     Utility function that enables users to quickly check if the option combination they intend
#     to use is permitted.  ``check_option_consistency`` does not require models or optimizers
#     to be constructed, and can be called at any point in the script.  ``check_option_consistency``
#     is totally self-contained; it does not set any amp global state or affect anything outside
#     of itself.
#     """
#
#     if not enabled:
#         return
#
#     if opt_level not in opt_levels:
#         raise RuntimeError("Unexpected optimization level.  Options are 'O0', 'O1', 'O2', 'O3'.")
#     else:
#         opt_properties = opt_levels[opt_level](Properties())
#         print("Selected optimization level {}", opt_levels[opt_level].brief)
#         print("Defaults for this optimization level are:")
#         for k, v in opt_properties.options:
#             print("{:22} : {}".format(k, v))
#
#     print("Processing user overrides (additional kwargs that are not None)...")
#     for k, v in kwargs:
#         if k not in _amp_state.opt_properties.options:
#             raise RuntimeError("Unexpected kwarg {}".format(k))
#         if v is not None:
#             setattr(opt_properties, k, v)
#
#     print("After processing overrides, optimization options are:")
#     for k, v in opt_properties.options:
#         print("{:22} : {}".format(k, v))


================================================
FILE: apex/apex/amp/handle.py
================================================
import contextlib
import warnings
import torch

from . import utils
from .opt import OptimWrapper
from .scaler import LossScaler
from ._amp_state import _amp_state, master_params, maybe_print
from ..fp16_utils import FP16_Optimizer as FP16_Optimizer_general
from ..optimizers import FP16_Optimizer as FP16_Optimizer_for_fused
from ..parallel.LARC import LARC


# There's no reason to expose the notion of a "handle". Everything can happen through amp.* calls.
@contextlib.contextmanager
def scale_loss(loss,
               optimizers,
               loss_id=0,
               model=None,
               delay_unscale=False,
               delay_overflow_check=False):
    """
    On context manager entrance, creates ``scaled_loss = (loss.float())*current loss scale``.
    ``scaled_loss`` is yielded so that the user can call ``scaled_loss.backward()``::

        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()

    On context manager exit (if ``delay_unscale=False``), the gradients are checked for infs/NaNs
    and unscaled, so that ``optimizer.step()`` can be called.

    .. note::
        If Amp is using explicit FP32 master params (which is the default for ``opt_level=O2``, and
        can also be manually enabled by supplying ``master_weights=True`` to ``amp.initialize``)
        any FP16 gradients are copied to FP32 master gradients before being unscaled.
        ``optimizer.step()`` will then apply the unscaled master gradients to the master params.

    .. warning::
        If Amp is using explicit FP32 master params, only the FP32 master gradients will be
        unscaled.  The direct ``.grad`` attributes of any FP16
        model params will remain scaled after context manager exit.
        This subtlety affects gradient clipping.  See "Gradient clipping" under
        `Advanced Amp Usage`_ for best practices.

    Args:
        loss(Tensor):  Typically a scalar Tensor. The ``scaled_loss`` that the context
            manager yields is simply ``loss.float()*loss_scale``, so in principle
            ``loss`` could have more than one element, as long as you call
            ``backward()`` on ``scaled_loss`` appropriately within the context manager body.
        optimizers:  All optimizer(s) for which the current backward pass is creating gradients.
            Must be an optimizer or list of optimizers returned from an earlier call
            to ``amp.initialize``.  For example use with multiple optimizers, see
            "Multiple models/optimizers/losses" under `Advanced Amp Usage`_.
        loss_id(int, optional, default=0):  When used in conjunction with the ``num_losses`` argument
            to ``amp.initialize``, enables Amp to use a different loss scale per loss.  ``loss_id``
            must be an integer between 0 and ``num_losses`` that tells Amp which loss is
            being used for the current backward pass.  See "Multiple models/optimizers/losses"
            under `Advanced Amp Usage`_ for examples.  If ``loss_id`` is left unspecified, Amp
            will use the default global loss scaler for this backward pass.
        model(torch.nn.Module, optional, default=None):  Currently unused, reserved to enable future
            optimizations.
        delay_unscale(bool, optional, default=False):  ``delay_unscale`` is never necessary, and
            the default value of ``False`` is strongly recommended.
            If ``True``, Amp will not unscale the gradients or perform model->master
            gradient copies on context manager exit.
            ``delay_unscale=True`` is a minor ninja performance optimization and can result
            in weird gotchas (especially with multiple models/optimizers/losses),
            so only use it if you know what you're doing.
            "Gradient accumulation across iterations" under `Advanced Amp Usage`_
            illustrates a situation where this CAN (but does not need to) be used.

    .. warning::
        If ``delay_unscale`` is ``True`` for a given backward pass, ``optimizer.step()`` cannot be
        called yet after context manager exit, and must wait for another, later backward context
        manager invocation with ``delay_unscale`` left to False.

    .. _`Advanced Amp Usage`:
        https://nvidia.github.io/apex/advanced.html
    """
    if not hasattr(_amp_state, "opt_properties"):
        raise RuntimeError("Invoked 'with amp.scale_loss`, but internal Amp state has not been initialized.  "
                           "model, optimizer = amp.initialize(model, optimizer, opt_level=...) must be called "
                           "before `with amp.scale_loss`.")

    if not _amp_state.opt_properties.enabled:
        yield loss
        return

    if isinstance(optimizers, torch.optim.Optimizer) or isinstance(optimizers, LARC):
        optimizers = [optimizers]

    # this is what happens when i have to support tools from different sources under the same API...
    # TODO:  Rewrite FusedAdam to use multi-tensor apply and the same loss scaler.
    if isinstance(optimizers, FP16_Optimizer_for_fused):
        loss_scale = optimizers.cur_scale
    else:
        loss_scaler = _amp_state.loss_scalers[loss_id]
        loss_scale = loss_scaler.loss_scale()

    if ((not _amp_state.opt_properties.master_weights)
        and (not loss_scaler.dynamic)
        and loss_scale == 1.0):
        yield loss.float()
        # Needing to drop the cache here as well is an ugly gotcha.
        # But for now I think it's necessary to short-circuit.
        # Probably ok to skip this if not delay_unscale
        if _amp_state.opt_properties.patch_torch_functions:
            _amp_state.handle._clear_cache()
        return

    if not delay_unscale:
        if isinstance(optimizers, list):
            for optimizer in optimizers:
                if not optimizer._amp_stash.params_have_scaled_gradients:
                    optimizer._prepare_amp_backward()

    yield (loss.float())*loss_scale

    if delay_unscale:
        for optimizer in optimizers:
            optimizer._amp_stash.params_have_scaled_gradients = True
    else:
        # FusedAdam and FusedSGD will take care of unscaling as part of their step() methods.
        if not isinstance(optimizers, FP16_Optimizer_for_fused):
            loss_scaler.clear_overflow_state()
            for optimizer in optimizers:
                optimizer._post_amp_backward(loss_scaler)
                optimizer._amp_stash.params_have_scaled_gradients = False
            # For future fused optimizers that enable sync-free dynamic loss scaling,
            # should_skip will always be False.
            should_skip = False if delay_overflow_check else loss_scaler.update_scale()
            if should_skip:
                for optimizer in optimizers:
                    if not optimizer._amp_stash.already_patched:
                        # Close on loss_scaler and loss_id as well, to be safe.  Probably not
                        # necessary because amp.scale_loss is already creating a temporary scope.
                        def patch_step(opt, loss_scaler, loss_id):
                            opt_step = opt.step
                            def skip_step(closure=None):
                                if closure is not None:
                                    raise RuntimeError("Currently, Amp does not support closure use with optimizers.")
                                maybe_print(("Gradient overflow.  Skipping step, loss scaler " +
                                             "{} reducing loss scale to {}").format(loss_id,
                                             loss_scaler.loss_scale()))
                                if hasattr(opt._amp_stash, "all_fp32_from_fp16_params"):
                                    # Clear the master grads that wouldn't be zeroed by model.zero_grad()
                                    for param in opt._amp_stash.all_fp32_from_fp16_params:
                                        param.grad = None
                                opt.step = opt_step
                                opt._amp_stash.already_patched = False
                            return skip_step
                        optimizer.step = patch_step(optimizer, loss_scaler, loss_id)
                        optimizer._amp_stash.already_patched = True

    # Probably ok to skip this if not delay_unscale
    if _amp_state.opt_properties.patch_torch_functions:
        _amp_state.handle._clear_cache()


# Free function version of AmpHandle.disable_casts, another step on the
# path to removing the concept of "AmpHandle"
@contextlib.contextmanager
def disable_casts():
    _amp_state.handle._is_active = False
    yield
    _amp_state.handle._is_active = True


class AmpHandle(object):
    def __init__(self, loss_scale="dynamic", enable_caching=True, verbose=False):
        self._enable_caching = enable_caching
        self._verbose = verbose
        self._cache = dict()
        self._default_scaler = LossScaler(loss_scale)
        self._is_active = True
        self._all_wrappers = []

    def is_active(self):
        return self._is_active

    @contextlib.contextmanager
    def _disable_casts(self):
        self._is_active = False
        yield
        self._is_active = True

    def wrap_optimizer(self, optimizer, num_loss=1):
        self._default_scaler = None
        return OptimWrapper(optimizer, self, num_loss)

    @contextlib.contextmanager
    def scale_loss(self, loss, optimizer):
        raise RuntimeError("The old Amp API is no longer supported.  Please move to the new API, "
            "documented here:  https://nvidia.github.io/apex/amp.html.  Transition guide:  "
            "https://nvidia.github.io/apex/amp.html#transition-guide-for-old-api-users")

        if not self.is_active():
            yield loss
            return

        if self._default_scaler is None:
            raise RuntimeError(
                'After calling `handle.wrap_optimizer()`, you must explicitly ' +
                'use `optimizer.scale_loss(loss)`.')

        # TODO: this code block is duplicated here and `opt.py`. Unify.
        loss_scale = self._default_scaler.loss_scale()
        yield loss * loss_scale

        self._default_scaler.clear_overflow_state()
        self._default_scaler.unscale(
            master_params(optimizer),
            master_params(optimizer),
            loss_scale)
        should_skip = self._default_scaler.update_scale()
        if should_skip:
            optimizer_step = optimizer.step
            def skip_step():
                maybe_print('Gradient overflow, skipping update')
                optimizer.step = optimizer_step
            optimizer.step = skip_step

        self._clear_cache()

    def _clear_cache(self):
        self._cache.clear()

    # Experimental support for saving / restoring uncasted versions of functions
    def _save_func(self, mod, fn, func):
        self._all_wrappers.append((mod, fn, func))

    def _deactivate(self):
        for mod, fn, func in self._all_wrappers:
            utils.set_func(mod, fn, func)
        self._all_wrappers = []

    @property
    def has_cache(self):
        return self._enable_caching

    @property
    def cache(self):
        return self._cache

    def remove_cache(self, param):
        if self.has_cache and param in self.cache:
            del self.cache[param]

    @property
    def verbose(self):
        return self._verbose

class NoOpHandle(object):
    def is_active(self):
        return False

    @contextlib.contextmanager
    def _disable_casts(self):
        yield

    def wrap_optimizer(self, optimizer, num_loss=1):
        return OptimWrapper(optimizer, self, num_loss)

    @contextlib.contextmanager
    def scale_loss(self, loss, optimizer):
        yield loss

    @property
    def has_cache(self):
        return False

    @property
    def verbose(self):
        return False

    def _clear_cache(self):
        pass

    def _deactivate(self):
        pass


================================================
FILE: apex/apex/amp/lists/__init__.py
================================================


================================================
FILE: apex/apex/amp/lists/functional_overrides.py
================================================

# TODO: think about the following two. They do weird things.
# - torch.nn.utils.clip_grad (but it should always be fp32 anyway)
# - torch.nn.utils.weight_norm

# Notes:
# F.instance_norm uses batch_norm internally. Which correctly handles
#   fp16 in/out with fp32 weights. So we shouldn't do anything for
#   either of these.
# F.normalize calls `input.norm()` internally, so it's redundant, but
#   kept here in case impl. changes.
# F.cosine_similarity is same: calls `x.norm()` internally.

import torch.nn.functional

MODULE = torch.nn.functional

FP16_FUNCS = [
    'conv1d',
    'conv2d',
    'conv3d',
    'conv_transpose1d',
    'conv_transpose2d',
    'conv_transpose3d',
    'conv_tbc', # Undocumented / maybe new?
    'linear',
]

FP32_FUNCS = [

    # Interpolation/Upsampling
    'interpolate',

    # Pointwise
    'softplus',
    'softmin',
    'log_softmax',
    'softmax',

    # Normalization
    'layer_norm',
    'group_norm',
    'local_response_norm',
    'normalize',
    'cosine_similarity',

    # Loss functions
    # TODO: which of these can be fp16?
    'poisson_nll_loss',
    'cosine_embedding_loss',
    'cross_entropy',
    'hinge_embedding_loss',
    'kl_div',
    'l1_loss',
    'mse_loss',
    'margin_ranking_loss',
    'multilabel_margin_loss',
    'multilabel_soft_margin_loss',
    'multi_margin_loss',
    'nll_loss',
    'binary_cross_entropy_with_logits',
    'smooth_l1_loss',
    'soft_margin_loss',
    'triplet_margin_loss'
]

BANNED_FUNCS = [
    ('binary_cross_entropy',
     ("\namp does not work out-of-the-box with `F.binary_cross_entropy` or `torch.nn.BCELoss.` "
      "It requires that the output of the previous function be already a FloatTensor. \n\n"
      "Most models have a Sigmoid right before BCELoss. In that case, you can use\n"
      "    torch.nn.BCEWithLogitsLoss\nto combine Sigmoid+BCELoss into a single layer "
      "that is compatible with amp.\nAnother option is to add\n"
      "    amp.register_float_function(torch, 'sigmoid')\nbefore calling `amp.init()`.\n"
      "If you _really_ know what you are doing, you can disable this warning by passing "
      "allow_banned=True to `amp.init()`."))
]


================================================
FILE: apex/apex/amp/lists/tensor_overrides.py
================================================
from .. import compat
from . import torch_overrides

import importlib

import torch

if compat.variable_is_tensor() and not compat.tensor_is_variable():
    MODULE = torch.Tensor
else:
    MODULE = torch.autograd.Variable


FP16_FUNCS = [
    '__matmul__',
]

FP32_FUNCS = [
    '__ipow__',
    '__pow__',
    '__rpow__',

    # Cast to fp32 before transfer to CPU
    'cpu',
]

CASTS = [
    '__add__',
    '__div__',
    '__eq__',
    '__ge__',
    '__gt__',
    '__iadd__',
    '__idiv__',
    '__imul__',
    '__isub__',
    '__itruediv__',
    '__le__',
    '__lt__',
    '__mul__',
    '__ne__',
    '__radd__',
    '__rdiv__',
    '__rmul__',
    '__rsub__',
    '__rtruediv__',
    '__sub__',
    '__truediv__',
]

# None of these, but here to make code cleaner.
SEQUENCE_CASTS = []

# We need to grab all the methods from torch_overrides and add them to
# the Tensor lists as well, as almost all methods are duplicated
# between `torch` and `torch.Tensor` (and check with `hasattr`,
# because a few random ones aren't defined on Tensor)
_self_mod = importlib.import_module(__name__)
for attrname in ['FP16_FUNCS', 'FP32_FUNCS', 'CASTS', 'SEQUENCE_CASTS']:
    lst = getattr(_self_mod, attrname)
    for fn in getattr(torch_overrides, attrname):
        if hasattr(MODULE, fn):
            lst.append(fn)


================================================
FILE: apex/apex/amp/lists/torch_overrides.py
================================================
import torch

from .. import utils

MODULE = torch

FP16_FUNCS = [
    # Low level functions wrapped by torch.nn layers.
    # The wrapper layers contain the weights which are then passed in as a parameter
    # to these functions.
    'conv1d',
    'conv2d',
    'conv3d',
    'conv_transpose1d',
    'conv_transpose2d',
    'conv_transpose3d',
    'conv_tbc',
    'prelu',

    # BLAS
    'addmm',
    'addmv',
    'addr',
    'matmul',
    'mm',
    'mv',
]

FP32_FUNCS = [
    # Pointwise
    'acos',
    'asin',
    'cosh',
    'erfinv',
    'exp',
    'expm1',
    'log',
    'log10',
    'log2',
    'reciprocal',
    'rsqrt',
    'sinh',
    'tan',

    # Other math
    'pow',

    # Reduction
    'cumprod',
    'cumsum',
    'dist',
    'mean',
    'norm',
    'prod',
    'std',
    'sum',
    'var',

    # Misc
    'renorm'
]

# Before CUDA 9.1, batched matmul was missing fast FP16 kernels. We
# check the CUDA version -- if at least 9.1, then put the bmm
# functions on the fp16 list. Otherwise, put them on the fp32 list.
_bmms = ['addbmm',
         'baddbmm',
         'bmm']
if utils.get_cuda_version() >= (9, 1, 0):
    FP16_FUNCS.extend(_bmms)
else:
    FP32_FUNCS.extend(_bmms)

# Multi-tensor fns that may need type promotion
CASTS = [
    # Multi-tensor math
    'addcdiv',
    'addcmul',
    'atan2',
    'cross',
    'bilinear',

    # Element-wise _or_ tensor-wise math
    'add',
    'div',
    'mul',

    # Comparison
    'eq',
    'equal',
    'ge',
    'gt',
    'le',
    'lt',
    'ne'
]

# Functions that take sequence arguments. We need to inspect the whole
# sequence and cast to the widest type.
SEQUENCE_CASTS = [
    'cat',
    'stack'
]


================================================
FILE: apex/apex/amp/opt.py
================================================
import contextlib
import warnings

from .scaler import LossScaler, master_params
from ._amp_state import maybe_print

import numpy as np

class OptimWrapper(object):
    def __init__(self, optimizer, amp_handle, num_loss):
        self._optimizer = optimizer
        self._amp_handle = amp_handle
        self._num_loss = num_loss
        self._loss_idx = 0
        self._skip_next = [False] * num_loss
        self._loss_scaler = [LossScaler('dynamic') for _ in range(num_loss)]

    @contextlib.contextmanager
    def scale_loss(self, loss):
        if not self._amp_handle.is_active():
            yield loss
            return

        # When there are multiple losses per-optimizer, we need
        # to save out current grad accumulation, since we won't be
        # able to unscale this particulare loss once the grads are
        # all mixed together.
        cached_grads = []
        if self._loss_idx > 0:
            for p in master_params(self._optimizer):
                if p.grad is not None:
                    cached_grads.append(p.grad.data.detach().clone())
                else:
                    cached_grads.append(None)
            self._optimizer.zero_grad()

        loss_scale = self._cur_loss_scaler().loss_scale()
        yield loss * loss_scale

        self._cur_loss_scaler().clear_overflow_state()
        self._cur_loss_scaler().unscale(
            master_params(self._optimizer),
            master_params(self._optimizer),
            loss_scale)
        self._skip_next[self._loss_idx] = self._cur_loss_scaler().update_scale()
        self._loss_idx += 1

        if len(cached_grads) > 0:
            for p, cached_grad in zip(master_params(self._optimizer),
                                      cached_grads):
                if cached_grad is not None:
                    p.grad.data.add_(cached_grad)
            cached_grads = []

    def _cur_loss_scaler(self):
        assert 0 <= self._loss_idx < self._num_loss
        return self._loss_scaler[self._loss_idx]

    def step(self, closure=None):
        if not self._amp_handle.is_active():
            return self._optimizer.step(closure=closure)

        self._loss_idx = 0

        for group in self._optimizer.param_groups:
            for p in group['params']:
                self._amp_handle.remove_cache(p)

        if closure is not None:
            raise NotImplementedError(
                'The `closure` argument is unsupported by the amp ' +
                'optimizer wrapper.')
        if any(self._skip_next):
            maybe_print('Gradient overflow, skipping update')
            self._skip_next = [False] * self._num_loss
        else:
            return self._optimizer.step(closure=closure)

    # Forward any attribute lookups
    def __getattr__(self, attr):
        return getattr(self._optimizer, attr)

    # Forward all torch.optim.Optimizer methods
    def __getstate__(self):
        return self._optimizer.__getstate__()

    def __setstate__(self):
        return self._optimizer.__setstate__()

    def __repr__(self):
        return self._optimizer.__repr__()

    def state_dict(self):
        return self._optimizer.state_dict()

    def load_state_dict(self, state_dict):
        return self._optimizer.load_state_dict(state_dict)

    def zero_grad(self):
        return self._optimizer.zero_grad()

    def add_param_group(self, param_group):
        return self._optimizer.add_param_group(param_group)


================================================
FILE: apex/apex/amp/rnn_compat.py
================================================
from . import utils, wrap

import torch
_VF = torch._C._VariableFunctions
RNN_NAMES = ['rnn_relu', 'rnn_tanh', 'gru', 'lstm']

def _gen_VF_wrapper(name):
    def wrapper(*args, **kwargs):
        return getattr(_VF, name)(*args, **kwargs)
    return wrapper

# Some python magic to generate an object that has the rnn cell functions
# defined on it, all of which call into corresponding _VF version.
# Intended to patch torch.nn.modules.rnn._VF (aka, the ref named "_VF"
# imported at module scope within torch.nn.modules.rnn).  This should
# not affect third-party importers of _VF.py.
class VariableFunctionsShim(object):
    def __init__(self):
        for name in RNN_NAMES:
            for suffix in ['', '_cell']:
               fn_name = name + suffix
               setattr(self, fn_name, _gen_VF_wrapper(fn_name))

def has_old_rnns():
    try:
        torch.nn.backends.thnn.backend.LSTMCell
        return True
    except:
        return False

def whitelist_rnn_cells(handle, verbose):
    # Different module + function names in old/new RNN cases
    if has_old_rnns():
        fn_names = ['RNNReLUCell', 'RNNTanhCell', 'LSTMCell', 'GRUCell']
        mod = torch.nn.backends.thnn.backend
    else:
        fn_names = [x + '_cell' for x in RNN_NAMES]
        mod = torch.nn.modules.rnn._VF
        assert isinstance(mod, VariableFunctionsShim)

    # Insert casts on cell functions
    for fn in fn_names:
        wrap.cached_cast(mod, fn, utils.maybe_half, handle,
                         try_caching=True, verbose=verbose)

    if has_old_rnns():
        # Special handling of `backward` for fused gru / lstm:
        # The `backward` method calls Tensor.sum() (blacklist) internally,
        # and then the resulting grad_input has the wrong type.
        # TODO: where else is this a problem?
        for rnn_type in ['GRUFused', 'LSTMFused']:
            mod = getattr(torch.nn._functions.thnn.rnnFusedPointwise, rnn_type)
            wrap.disable_casts(mod, 'backward', handle)


================================================
FILE: apex/apex/amp/scaler.py
================================================
import torch
from ..multi_tensor_apply import multi_tensor_applier
from ._amp_state import _amp_state, master_params, maybe_print
from itertools import product

def scale_check_overflow_python(model_grad, master_grad, scale, check_overflow=False):
    # Exception handling for 18.04 compatibility
    if check_overflow:
        cpu_sum = float(model_grad.float().sum())
        if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum:
            return True

    if master_grad is not model_grad: # copy_ probably internally short-circuits this
        master_grad.copy_(model_grad)
    if scale != 1.0:
        master_grad.mul_(scale)
    return False

def axpby_check_overflow_python(model_grad, stashed_grad, master_grad, scale, check_overflow=False):
    # Exception handling for 18.04 compatibility
    if check_overflow:
        cpu_sum = float(model_grad.float().sum())
        if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum:
            return True

    # if master_grad is not model_grad: # copy_ probably internally short-circuits this
    #     master_grad.copy_(model_grad)
    assert stashed_grad.dtype == master_grad.dtype
    converted_model_grad = model_grad.to(master_grad.dtype)
    stashed_grad.add_(scale, converted_model_grad)
    master_grad.data = stashed_grad.data
    return False

class LossScaler(object):
    warned_no_fused_kernel = False
    warned_unscaling_non_fp32_grad = False
    has_fused_kernel = False

    def __init__(self,
                 loss_scale,
                 init_scale=2.**16,
                 scale_factor=2.,
                 scale_window=2000,
                 min_loss_scale=None,
                 max_loss_scale=2.**24):
        if loss_scale == "dynamic":
            self.dynamic = True
            self._loss_scale = init_scale
        else:
            self.dynamic = False
            self._loss_scale = loss_scale
        self._max_loss_scale = max_loss_scale
        self._min_loss_scale = min_loss_scale
        self._scale_seq_len = scale_window
        self._unskipped = 0
        self._has_overflow = False
        self._overflow_buf = torch.cuda.IntTensor([0])
        if multi_tensor_applier.available:
            import amp_C
            LossScaler.has_fused_kernel = multi_tensor_applier.available
            LossScaler.multi_tensor_scale_cuda = amp_C.multi_tensor_scale
            LossScaler.multi_tensor_axpby_cuda = amp_C.multi_tensor_axpby
        else:
            if not LossScaler.warned_no_fused_kernel:
                maybe_print(
                    "Warning:  multi_tensor_applier fused unscale kernel is unavailable, "
                    "possibly because apex was installed without --cuda_ext --cpp_ext. "
                    "Using Python fallback.  Original ImportError was: " +
                    repr(multi_tensor_applier.import_err),
                    True)
            LossScaler.has_fused_kernel = False
            LossScaler.warned_no_fused_kernel = True

    def loss_scale(self):
        return self._loss_scale

    def unscale_python(self, model_grads, master_grads, scale):
        for model, master in zip(model_grads, master_grads):
            if model is not None:
                if not LossScaler.warned_unscaling_non_fp32_grad:
                    if master.dtype != torch.float32:
                        maybe_print(
                            "Attempting to unscale a grad with type {} ".format(master.type()) +
                            "Unscaling non-fp32 grads may indicate an error. "
                            "When using Amp, you don't need to call .half() on your model.")
                        LossScaler.warned_unscaling_non_fp32_grad = True
                self._has_overflow = scale_check_overflow_python(model,
                                                                 master,
                                                                 1./scale,
                                                                 self.dynamic)
                if self._has_overflow and self.dynamic:
                    break

    # unused_scale keeps some of the old API alive for hopefully a short time.
    def unscale(self, model_grads, master_grads, unused_scale, models_are_masters=False):
        if self._has_overflow:
            return

        scale = self._loss_scale

        if scale == 1.0 and models_are_masters and not self.dynamic:
            return

        if LossScaler.has_fused_kernel:
            # if (not LossScaler.warned_unscaling_non_fp32_grad
            #     and master_grads[0].dtype == torch.float16):
            #     print("Warning:  unscaling grads that are not FP32. "
            #           "Unscaling non-fp32 grads may indicate an error. "
            #           "When using Amp, you don't need to call .half() on your model.")
            #     # Setting this to True unconditionally allows the possibility of an escape
            #     # if never-before-seen non-fp32 grads are created in some later iteration.
            #     LossScaler.warned_unscaling_non_fp32_grad = True
            multi_tensor_applier(LossScaler.multi_tensor_scale_cuda,
                                 self._overflow_buf,
                                 [model_grads, master_grads],
                                 1./scale)
        else:
            self.unscale_python(model_grads, master_grads, scale)

        # Defer to update_scale
        # If the fused kernel is available, we only need one D2H memcopy and sync.
        # if LossScaler.has_fused_kernel and self.dynamic and not self._has_overflow:
        #     self._has_overflow = self._overflow_buf.item()

    def unscale_with_stashed_python(self,
                                    model_grads,
                                    stashed_master_grads,
                                    master_grads,
                                    scale):
        for model, stashed, master in zip(model_grads, stashed_master_grads, master_grads):
            if model is None and stashed is None:
                continue
            else:
                if not LossScaler.warned_unscaling_non_fp32_grad:
                    if master.dtype != torch.float32:
                        maybe_print(
                            "Attempting to unscale a grad with type {} ".format(master.type()) +
                            "Unscaling non-fp32 grads may indicate an error. "
                            "When using Amp, you don't need to call .half() on your model.")
                        LossScaler.warned_unscaling_non_fp32_grad = True
                self._has_overflow = axpby_check_overflow_python(model,
                                                                 stashed,
                                                                 master,
                                                                 1./scale,
                                                                 self.dynamic)
                if self._has_overflow and self.dynamic:
                    break

    def unscale_with_stashed(self,
                             model_grads,
                             stashed_master_grads,
                             master_grads):
        if self._has_overflow:
            return

        scale = self._loss_scale

        if LossScaler.has_fused_kernel:
            if (not LossScaler.warned_unscaling_non_fp32_grad
                and master_grads[0].dtype == torch.float16):
                print("Warning:  unscaling grads that are not FP32. "
                      "Unscaling non-fp32 grads may indicate an error. "
                      "When using Amp, you don't need to call .half() on your model.")
                # Setting this to True unconditionally allows the possibility of an escape
                # if never-before-seen non-fp32 grads are created in some later iteration.
                LossScaler.warned_unscaling_non_fp32_grad = True
            multi_tensor_applier(LossScaler.multi_tensor_axpby_cuda,
                                 self._overflow_buf,
                                 [model_grads, stashed_master_grads, master_grads],
                                 1./scale,
                                 1.0,
                                 0) # check only arg 0, aka the incoming model grads, for infs
        else:
            self.unscale_with_stashed_python(model_grads,
                                             stashed_master_grads,
                                             master_grads,
                                             scale)

        # Defer to update_scale
        # If the fused kernel is available, we only need one D2H memcopy and sync.
        # if LossScaler.has_fused_kernel and self.dynamic and not self._has_overflow:
        #     self._has_overflow = self._overflow_buf.item()

    def clear_overflow_state(self):
        self._has_overflow = False
        if self.has_fused_kernel:
            self._overflow_buf.zero_()

    # Separate so unscale() can be called more that once before updating.
    def update_scale(self):
        # If the fused kernel is available, we only need one D2H memcopy and sync.
        if LossScaler.has_fused_kernel and self.dynamic and not self._has_overflow:
            self._has_overflow = self._overflow_buf.item()

        if self._has_overflow and self.dynamic:
            should_skip = True
            if(self._min_loss_scale):
                self._loss_scale = max(self._min_loss_scale, self._loss_scale/2.)
            else:
                self._loss_scale = self._loss_scale/2.
            self._unskipped = 0
        else:
            should_skip = False
            self._unskipped += 1

        if self._unskipped == self._scale_seq_len and self.dynamic:
            self._loss_scale = min(self._max_loss_scale, self._loss_scale*2.)
            self._unskipped = 0

        return should_skip


================================================
FILE: apex/apex/amp/utils.py
================================================
from . import compat

import functools
import itertools

import torch

def get_cuda_version():
    return tuple(int(x) for x in torch.version.cuda.split('.'))

def is_fp_tensor(x):
    if is_nested(x):
        # Fast-fail version of all(is_fp_tensor)
        for y in x:
            if not is_fp_tensor(y):
                return False
        return True
    return compat.is_tensor_like(x) and compat.is_floating_point(x)

def is_nested(x):
    return isinstance(x, tuple) or isinstance(x, list)

def should_cache(x):
    if is_nested(x):
        # Fast-fail version of all(should_cache)
        for y in x:
            if not should_cache(y):
                return False
        return True
    return isinstance(x, torch.nn.parameter.Parameter) and \
        type_string(x) == 'FloatTensor'

def collect_fp_tensor_types(args, kwargs):
    def collect_types(x, types):
        if is_nested(x):
            for y in x:
                collect_types(y, types)
        else:
            types.add(type_string(x))

    all_args = itertools.chain(args, kwargs.values())
    types = set()
    for x in all_args:
        if is_fp_tensor(x):
            collect_types(x, types)
    return types

def type_string(x):
    return x.type().split('.')[-1]

def maybe_half(x, name='', verbose=False):
    if is_nested(x):
        return type(x)([maybe_half(y) for y in x])

    if not x.is_cuda or type_string(x) == 'HalfTensor':
        return x
    else:
        if verbose:
            print('Float->Half ({})'.format(name))
        return x.half()

def maybe_float(x, name='', verbose=False):
    if is_nested(x):
        return type(x)([maybe_float(y) for y in x])

    if not x.is_cuda or type_string(x) == 'FloatTensor':
        return x
    else:
        if verbose:
            print('Half->Float ({})'.format(name))
        return x.float()

# NB: returneds casted `args`, mutates `kwargs` in-place
def casted_args(cast_fn, args, kwargs):
    new_args = []
    for x in args:
        if is_fp_tensor(x):
            new_args.append(cast_fn(x))
        else:
            new_args.append(x)
    for k in kwargs:
        val = kwargs[k]
        if is_fp_tensor(val):
            kwargs[k] = cast_fn(val)
    return new_args

def cached_cast(cast_fn, x, cache):
    if is_nested(x):
        return type(x)([cached_cast(y) for y in x])
    if x in cache:
        cached_x = cache[x]
        if x.requires_grad and cached_x.requires_grad:
            # Make sure x is actually cached_x's autograd parent.
            if cached_x.grad_fn.next_functions[1][0].variable is not x:
                raise RuntimeError("x and cache[x] both require grad, but x is not "
                                   "cache[x]'s parent.  This is likely an error.")
        # During eval, it's possible to end up caching casted weights with
        # requires_grad=False.  On the next training iter, if cached_x is found
        # and reused from the cache, it will not actually have x as its parent.
        # Therefore, we choose to invalidate the cache (and force refreshing the cast)
        # if x.requires_grad and cached_x.requires_grad do not match.
        #
        # During eval (i.e. running under with torch.no_grad()) the invalidation
        # check would cause the cached value to be dropped every time, because
        # cached_x would always be created with requires_grad=False, while x would
        # still have requires_grad=True.  This would render the cache effectively
        # useless during eval.  Therefore, if we are running under the no_grad()
        # context manager (torch.is_grad_enabled=False) we elide the invalidation
        # check, and use the cached value even though its requires_grad flag doesn't
        # match.  During eval, we don't care that there's no autograd-graph
        # connection between x and cached_x.
        if torch.is_grad_enabled() and x.requires_grad != cached_x.requires_grad:
            del cache[x]
        else:
            return cached_x

    casted_x = cast_fn(x)
    cache[x] = casted_x
    return casted_x

def verbosify(cast_fn, fn_name, verbose):
    if verbose:
        return functools.partial(cast_fn, name=fn_name, verbose=verbose)
    else:
        return cast_fn

def as_inplace(fns):
    for x in fns:
        yield x + '_'

def has_func(mod, fn):
    if isinstance(mod, torch.nn.backends.backend.FunctionBackend):
        return fn in mod.function_classes
    elif isinstance(mod, dict):
        return fn in mod
    else:
        return hasattr(mod, fn)

def get_func(mod, fn):
    if isinstance(mod, torch.nn.backends.backend.FunctionBackend):
        return mod.function_classes[fn]
    elif isinstance(mod, dict):
        return mod[fn]
    else:
        return getattr(mod, fn)

def set_func(mod, fn, new_fn):
    if isinstance(mod, torch.nn.backends.backend.FunctionBackend):
        mod.function_classes[fn] = new_fn
    elif isinstance(mod, dict):
        mod[fn] = new_fn
    else:
        setattr(mod, fn, new_fn)

def set_func_save(handle, mod, fn, new_fn):
    cur_fn = get_func(mod, fn)
    handle._save_func(mod, fn, cur_fn)
    set_func(mod, fn, new_fn)

# A couple problems get solved here:
# - The flat_weight buffer is disconnected from autograd graph,
#   so the fp16 weights need to be derived from the input weights
#   to this forward call, not the flat buffer.
# - The ordering of weights in the flat buffer is...idiosyncratic.
# First problem is solved with combination of set_ (to set up
# correct storage) and copy_ (so the fp16 weight derives from the
# fp32 one in autograd.
# Second is solved by doing ptr arithmetic on the fp32 weights
# to derive the correct offset.
#
# TODO: maybe this should actually use
# `torch._cudnn_rnn_flatten_weight`? But then I need to call
# on first iter and cache the right offsets. Ugh.
def synthesize_flattened_rnn_weights(fp32_weights,
                                     fp16_flat_tensor,
                                     rnn_fn='',
                                     verbose=False):
    fp16_weights = []
    fp32_base_ptr = fp32_weights[0][0].data_ptr()
    for layer_weights in fp32_weights:
        fp16_layer_weights = []
        for w_fp32 in layer_weights:
            w_fp16 = w_fp32.new().half()
            offset = (w_fp32.data_ptr() - fp32_base_ptr) // w_fp32.element_size()
            w_fp16.set_(fp16_flat_tensor.storage(),
                        offset,
                        w_fp32.shape)
            w_fp16.copy_(w_fp32)
            if verbose:
                print('Float->Half ({})'.format(rnn_fn))
            fp16_layer_weights.append(w_fp16)
        fp16_weights.append(fp16_layer_weights)
    return fp16_weights

# Roughly same as above, just the `fp32_weights` aren't nested.
# Code kept separate for readability.
def new_synthesize_flattened_rnn_weights(fp32_weights,
                                         fp16_flat_tensor,
                                         rnn_fn='',
                                         verbose=False):
    fp16_weights = []
    fp32_base_ptr = fp32_weights[0].data_ptr()
    for w_fp32 in fp32_weights:
        w_fp16 = w_fp32.new().half()
        offset = (w_fp32.data_ptr() - fp32_base_ptr) // w_fp32.element_size()
        w_fp16.set_(fp16_flat_tensor.storage(),
                    offset,
                    w_fp32.shape)
        w_fp16.copy_(w_fp32)
        if verbose:
            print('Float->Half ({})'.format(rnn_fn))
        fp16_weights.append(w_fp16)
    return fp16_weights


================================================
FILE: apex/apex/amp/wrap.py
================================================
from . import compat
from . import utils
from ._amp_state import _amp_state
from . import rnn_compat

import functools

import torch

def make_cast_wrapper(orig_fn, cast_fn, handle,
                      try_caching=False):
    @functools.wraps(orig_fn)
    def wrapper(*args, **kwargs):
        if not handle.is_active():
            return orig_fn(*args, **kwargs)

        if try_caching and handle.has_cache:
            args = list(args)
            for i in range(len(args)):
                if utils.should_cache(args[i]):
                    args[i] = utils.cached_cast(cast_fn, args[i], handle.cache)
            for k in kwargs:
                if utils.should_cache(kwargs[k]):
                    kwargs[k] = utils.cached_cast(cast_fn, kwargs[k], handle.cache)
        new_args = utils.casted_args(cast_fn,
                                     args,
                                     kwargs)
        return orig_fn(*new_args, **kwargs)
    return wrapper

def cached_cast(mod, fn, cast_fn, handle,
                try_caching=False, verbose=False):
    if not utils.has_func(mod, fn):
        return

    orig_fn = utils.get_func(mod, fn)
    cast_fn = utils.verbosify(cast_fn, fn, verbose)
    wrapper = make_cast_wrapper(orig_fn, cast_fn, handle, try_caching)
    utils.set_func_save(handle, mod, fn, wrapper)

# `handle` arg is unused, but simplifies API to make `make_cast_wrapper`
# Annoyingly, make_promote_wrapper still uses the global handle.  Once everyone
# is on the new API and I am free to get rid of handle, I can clean this up.
def make_promote_wrapper(orig_fn, cast_fn, handle=None):
    @functools.wraps(orig_fn)
    def wrapper(*args, **kwargs):
        if not _amp_state.handle.is_active():
            return orig_fn(*args, **kwargs)

        types = utils.collect_fp_tensor_types(args, kwargs)

        if len(types) <= 1:
            return orig_fn(*args, **kwargs)
        elif len(types) == 2 and types == set(['HalfTensor', 'FloatTensor']):
            new_args = utils.casted_args(cast_fn,
                                         args,
                                         kwargs)
            return orig_fn(*new_args, **kwargs)
        else:
            raise NotImplementedError('Do not know how to handle ' +
                                      'these types to promote: {}'
                                      .format(types))
    return wrapper

def promote(mod, fn, handle, verbose=False):
    orig_fn = utils.get_func(mod, fn)
    maybe_float = utils.verbosify(utils.maybe_float, fn, verbose)
    wrapper = make_promote_wrapper(orig_fn, maybe_float)
    utils.set_func_save(handle, mod, fn, wrapper)

def sequence_promote(mod, fn, handle, verbose=False):
    orig_fn = utils.get_func(mod, fn)
    maybe_float = utils.verbosify(utils.maybe_float, fn, verbose)
    @functools.wraps(orig_fn)
    def wrapper(seq, *args, **kwargs):
        if not _amp_state.handle.is_active():
            return orig_fn(seq, *args, **kwargs)

        types = set([utils.type_string(x) for x in seq])
        if len(types) <= 1:
            return orig_fn(seq, *args, **kwargs)
        elif types == set(['HalfTensor', 'FloatTensor']):
            cast_seq = utils.casted_args(maybe_float,
                                         seq, {})
            return orig_fn(cast_seq, *args, **kwargs)
        else:
            # TODO: other mixed-type cases aren't due to amp.
            #       Just pass through?
            return orig_fn(seq, *args, **kwargs)
    utils.set_func_save(handle, mod, fn, wrapper)

def promote_match_arg0(mod, fn, handle, verbose=False):
    if not utils.has_func(mod, fn):
        return

    orig_fn = utils.get_func(mod, fn)
    @functools.wraps(orig_fn)
    def wrapper(arg0, *args, **kwargs):
        assert compat.is_tensor_like(arg0)
        if not _amp_state.handle.is_active():
            return orig_fn(arg0, *args, **kwargs)

        if utils.type_string(arg0) == 'HalfTensor':
            cast_fn = utils.maybe_half
        elif utils.type_string(arg0) == 'FloatTensor':
            cast_fn = utils.maybe_float
        else:
            return orig_fn(arg0, *args, **kwargs)
        cast_fn = utils.verbosify(cast_fn, fn, verbose)
        new_args = utils.casted_args(cast_fn, args, kwargs)
        return orig_fn(arg0, *new_args, **kwargs)
    utils.set_func_save(handle, mod, fn, wrapper)

def err_if_any_half(mod, fn, handle, custom_err_msg=None):
    if not utils.has_func(mod, fn):
        return

    orig_fn = utils.get_func(mod, fn)
    @functools.wraps(orig_fn)
    def wrapper(*args, **kwargs):
        types = utils.collect_fp_tensor_types(args, kwargs)
        if 'HalfTensor' in types:
            if custom_err_msg:
                raise NotImplementedError(custom_err_msg)
            else:
                raise NotImplementedError('Cannot call in-place function ' +
                                          '{} with fp16 arguments.'.format(fn))
        else:
            return orig_fn(*args, **kwargs)
    utils.set_func_save(handle, mod, fn, wrapper)

def err_if_arg0_half(mod, fn, handle, verbose=False):
    if not utils.has_func(mod, fn):
        return

    orig_fn = utils.get_func(mod, fn)
    @functools.wraps(orig_fn)
    def wrapper(arg0, *args, **kwargs):
        assert compat.is_tensor_like(arg0)
        if utils.type_string(arg0) == 'HalfTensor':
            raise NotImplementedError('Cannot call in-place method ' +
                                      '{} on fp16 Tensors.'.format(fn))
        else:
            cast_fn = utils.verbosify(utils.maybe_float, fn, verbose)
            new_args = utils.casted_args(cast_fn, args, kwargs)
            return orig_fn(arg0, *new_args, **kwargs)
    utils.set_func_save(handle, mod, fn, wrapper)

# Current RNN approach:
# - Wrap top-level `RNN` function in thnn backend
# - Will call into either CudnnRNN or AutogradRNN
#  - Each of these are factory functions that return a per-iter
#    `forward` function
# - We interpose on the factory function to:
#   1) Interpose on the actual forward function and put in casts
#   2) Insert an fp16 `flat_weight` if necessary
def rnn_cast(backend, fn, handle, verbose=False):
    orig_rnn = utils.get_func(backend, fn)
    @functools.wraps(orig_rnn)
    def rnn_wrapper(*args, **kwargs):
        flat_weight = kwargs.get('flat_weight')
        if flat_weight is not None:
            # We replace `flat_weight` with an uninitialized fp16
            # Tensor. The "actual" weight tensors (provided in `forward`),
            # will then be set up as ptrs into the buffer and have the
            # corresponding fp32 values copied in.
            # We need to call `copy` on the "actual" weights so that the
            # autograd graph correctly backprops from the wgrads computed
            # inside cuDNN (on fp16 weights) into the fp32 weights.
            assert utils.type_string(flat_weight) == 'FloatTensor'
            if compat.tensor_is_float_tensor() or compat.tensor_is_variable():
                # Pre-0.4. A little slower, since it zeros out memory.
                flat_weight_fp16 = flat_weight.new().half().resize_(flat_weight.shape)
            else:
                flat_weight_fp16 = torch.empty_like(flat_weight,
                                                    dtype=torch.float16)
            kwargs['flat_weight'] = flat_weight_fp16
        else:
            flat_weight_fp16 = None

        forward = orig_rnn(*args, **kwargs)
        @functools.wraps(forward)
        def fwd_wrapper(*fargs, **fkwargs):
            assert len(fargs) == 3 or len(fargs) == 4
            inputs, weights, hiddens = fargs[:3]
            assert utils.is_fp_tensor(inputs)
            assert isinstance(weights, list)
            cast_fn = utils.verbosify(utils.maybe_half,
                                      fn,
                                      verbose)
            new_args = []

            # 0) Inputs
            new_args.append(cast_fn(inputs))

            # 1) Weights
            if flat_weight_fp16 is not None:
                fp16_weights = utils.synthesize_flattened_rnn_weights(
                    weights, flat_weight_fp16, fn, verbose)
            else:
                fp16_weights = [[cast_fn(w) for w in layer]
                                for layer in weights]
            new_args.append(fp16_weights)

            # 2) Inputs: either a tuple (for LSTM) or single tensor
            if isinstance(hiddens, tuple):
                new_args.append(tuple(cast_fn(x) for x in hiddens))
            elif utils.is_fp_tensor(hiddens):
                new_args.append(cast_fn(hiddens))
            else:
                # Hiddens can, in principle, be `None` -- pass through
                new_args.append(hiddens)

            # 3) Batch sizes (0.4 or later only)
            if len(fargs) == 4:
                new_args.append(fargs[3])

            return forward(*new_args, **fkwargs)
        return fwd_wrapper
    utils.set_func_save(handle, backend, fn, rnn_wrapper)

def new_rnn_cast(fn, handle, verbose=False):
    # Forward+backward compatibility around https://github.com/pytorch/pytorch/pull/15744
    # For rnn backend calls that route through _rnn_impls, we must patch the ref
    # that _rnn_impls stashed.  For rnn backend calls that directly invoke
    # _VF.<backend>, e.g. _VF.lstm, we can patch onto VariableFunctionsShim,
    # which in turn has patched the ref named "_VF" in torch.nn.modules.rnn.
    if utils.has_func(torch.nn.modules.rnn._rnn_impls, fn):
        mod = torch.nn.modules.rnn._rnn_impls
    else:
        mod = torch.nn.modules.rnn._VF
        assert isinstance(mod, rnn_compat.VariableFunctionsShim)
        fn = fn.lower()
    orig_fn = utils.get_func(mod, fn)
    cast_fn = utils.verbosify(utils.maybe_half, fn, verbose)
    @functools.wraps(orig_fn)
    def wrapper(*args, **kwargs):
        # Exact call signature from modules/rnn.py
        assert len(args) == 9
        assert len(kwargs) == 0

        if not _amp_state.handle.is_active():
            return orig_fn(*args, **kwargs)

        if isinstance(args[6], bool):
            params_idx = 2 # Not PackedSequence case
        else:
            params_idx = 3 # PackedSequence case

        new_args = []
        for i, arg in enumerate(args):
            if i == params_idx:
                num_params = sum([x.numel() for x in arg])
                fp16_weight_buf = args[0].new_empty((num_params,),
                                                    dtype=torch.half)
                casted_weights = utils.new_synthesize_flattened_rnn_weights(
                    arg, fp16_weight_buf, fn, verbose)
                new_args.append(casted_weights)
            elif utils.is_fp_tensor(arg):
                new_args.append(cast_fn(arg))
            else:
                new_args.append(arg)

        return orig_fn(*new_args)
    utils.set_func_save(handle, mod, fn, wrapper)

def disable_casts(mod, fn, handle):
    if not utils.has_func(mod, fn):
        return

    orig_fn = utils.get_func(mod, fn)
    @functools.wraps(orig_fn)
    def wrapper(*args, **kwargs):
        with handle._disable_casts():
            return orig_fn(*args, **kwargs)
    utils.set_func_save(handle, mod, fn, wrapper)


================================================
FILE: apex/apex/fp16_utils/README.md
================================================
fp16_optimizer.py contains `FP16_Optimizer`, a Python class designed to wrap an existing Pytorch optimizer and automatically enable master parameters and loss scaling in a manner transparent to the user.  To use `FP16_Optimizer`, only two lines of one's Python model need to change.

#### [FP16_Optimizer API documentation](https://nvidia.github.io/apex/fp16_utils.html#automatic-management-of-master-params-loss-scaling)

#### [Simple examples with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/FP16_Optimizer_simple)

#### [Imagenet with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/imagenet)

#### [word_language_model with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/word_language_model)


fp16_util.py contains a number of utilities to manually manage master parameters and loss scaling, if the user chooses.  

#### [Manual management documentation](https://nvidia.github.io/apex/fp16_utils.html#manual-master-parameter-management)

The [Imagenet with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/imagenet) and [word_language_model with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/word_language_model) directories also contain `main.py` files that demonstrate manual management of master parameters and static loss scaling.  These examples illustrate what sort of operations `FP16_Optimizer` is performing automatically.


================================================
FILE: apex/apex/fp16_utils/__init__.py
================================================
from .fp16util import (
    BN_convert_float,
    network_to_half,
    prep_param_lists,
    model_grads_to_master_grads,
    master_params_to_model_params,
    tofp16,
    to_python_float,
    clip_grad_norm,
    convert_module,
    convert_network,
    FP16Model,
)

from .fp16_optimizer import FP16_Optimizer
from .loss_scaler import LossScaler, DynamicLossScaler


================================================
FILE: apex/apex/fp16_utils/fp16_optimizer.py
================================================
import torch
from torch import nn
from torch.autograd import Variable
from torch.nn.parameter import Parameter
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors

from ..amp._amp_state import _amp_state, maybe_print
from ..amp.scaler import LossScaler
from ..multi_tensor_apply import multi_tensor_applier
from .fp16util import model_grads_to_master_grads, master_params_to_model_params, clip_grad_norm

# TODO:  Update overflow check + downscale to use Carl's fused kernel.
class FP16_Optimizer(object):
    """
    :class:`FP16_Optimizer` is designed to wrap an existing PyTorch optimizer, 
    and manage static or dynamic loss scaling and master weights in a manner transparent to the user.
    For standard use, only two lines must be changed:  creating the :class:`FP16_Optimizer` instance,
    and changing the call to ``backward``.

    Example::

        model = torch.nn.Linear(D_in, D_out).cuda().half()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
        # Name the FP16_Optimizer instance to replace the existing optimizer
        # (recommended but not required):
        optimizer = FP16_Optimizer(optimizer, static_loss_scale = 128.0)
        ...
        # loss.backward() becomes:
        optimizer.backward(loss)
        ...

    Example with dynamic loss scaling::

        ...
        optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
                                   # optional arg to control dynamic loss scaling behavior
                                   # dynamic_loss_args={'scale_window' : 500})
                                   # Usually, dynamic_loss_args is not necessary. 

    Args:
        init_optimizer (torch.optim.optimizer):  Existing optimizer created with the parameters to optimize.  Internally, :class:`FP16_Optimizer` replaces the passed optimizer's fp16 parameters, if any, with fp32 master parameters copied from the original ones.  :class:`FP16_Optimizer` also stores references to the original fp16 parameters, and updates these fp16 parameters from the master fp32 copy at the end of each :attr:`step`.  
        static_loss_scale (float, optional, default=1.0):  Loss scale used internally to scale gradients computed by the model.  Any fp16 gradients will be copied to fp32, then downscaled before being applied to the fp32 master params, so ``static_loss_scale`` should not affect learning rate.
        dynamic_loss_scale (bool, optional, default=False):  Use dynamic loss scaling.  If True, this will override any ``static_loss_scale`` option.
        dynamic_loss_args (dict, optional, default=None):  Dict of kwargs that will be forwarded to the internal :class:`LossScaler` instance's constructor.  Keys of this dict must match kwargs accepted by :class:`LossScaler`'s constructor.  If ``dynamic_loss_args`` is unspecified, :class:`LossScaler`'s defaults will be used.
        verbose (bool, optional, default=True):  By default, FP16_Optimizer's constructor prints out the parameters and parameter groups it is ingesting, as a sanity check.  If this becomes annoying (e.g. for large models), it can be disabled by passing ``verbose=False``.  ``verbose=False`` will not disable printing when the loss scale is readjusted during dynamic loss scaling.

    ``init_optimizer`` is expected to have been constructed in the ordinary way.  
    It is recommended (although not required) that the newly constructed :class:`FP16_Optimizer` instance be 
    named to replace ``init_optimizer``, for two reasons:  
    First, it means that references to the same name
    later in the file will not have to change.  
    Second, :class:`FP16_Optimizer` reserves the right (as an implementation detail) to 
    modify ``init_optimizer``.  If you do choose a unique name for the new
    :class:`FP16_Optimizer` instance, you should only work with this new instance,
    because the preexisting optimizer might no longer behave as expected.

    ``init_optimizer`` may be any Pytorch optimizer. 
    It may contain a mixture of fp16 and fp32 parameters organized into any number of 
    ``param_groups`` with different hyperparameters.  The :class:`FP16_Optimizer` constructor will 
    ingest these ``param_groups`` and remember them. 

    Calls to ::

        loss.backward() 

    must be replaced with ::

        optimizer.backward(loss)  

    because :class:`FP16_Optimizer` requires ownership of the backward pass to implement 
    loss scaling and copies to master gradients.

    .. note::
        Loss scaling, either static or dynamic, is orthogonal to learning rate, because gradients
        are downscaled before being applied.  This means that adjusting the loss scale, or using
        dynamic loss scaling, should not require retuning the learning rate or any other 
        hyperparameters.


    **Advanced options**

    **Closures**:  :class:`FP16_Optimizer` can wrap a Pytorch optimizer that receives a closure.
    See docstring for :attr:`step`.

    **Gradient clipping**:  Use :attr:`clip_master_grads`.
    
    **Multiple losses**:  If your model accumulates gradients from multiple losses,
    this can be made more efficient by supplying ``update_master_grads=False``
    to :attr:`backward`.  See docstring for :attr:`backward`.

    **Manually adjusting loss scale**:  The current loss scale can be retrieved or set via ::

        print(optimizer.loss_scale)
        optimizer.loss_scale = new_loss_scale

    For static loss scaling, manually adjusting the loss scale over time is a reasonable
    thing to do.  During later epochs, gradients may become smaller, and a 
    higher loss scale may be required, analogous to scheduling the learning rate.  Dynamic loss
    scaling is more subtle (see :class:`DynamicLossScaler`) and in this case, manually adjusting 
    the loss scale is not recommended.

    **Multi_GPU training**:  If the wrapped ``init_optimizer`` was created from a model wrapped in
    Pytorch DistributedDataParallel or Apex DistributedDataParallel, :class:`FP16_Optimizer` 
    should still work as intended.
    """

    def __init__(self, 
                 init_optimizer, 
                 static_loss_scale=1.0, 
                 dynamic_loss_scale=False,
                 dynamic_loss_args=None,
                 verbose=True):
        if not torch.cuda.is_available:
            raise SystemError("Cannot use fp16 without CUDA.")

        self.verbose = verbose

        self.optimizer = init_optimizer
        # init_state_dict sets up an alternative way to cast per-param state tensors.
        # Stashing here in case https://github.com/pytorch/pytorch/issues/7733 makes it necessary.
        # init_state_dict = init_optimizer.state_dict()

        self.fp16_groups = []
        self.fp32_from_fp16_groups = []
        self.fp32_from_fp32_groups = []
        for i, param_group in enumerate(self.optimizer.param_groups):
            self.maybe_print("FP16_Optimizer processing param group {}:".format(i))
            fp16_params_this_group = []
            fp32_params_this_group = []
            fp32_from_fp16_params_this_group = []
            for i, param in enumerate(param_group['params']):
                if param.requires_grad:
                    if param.type() == 'torch.cuda.HalfTensor':
                        self.maybe_print("FP16_Optimizer received torch.cuda.HalfTensor with {}"
                                         .format(param.size()))
                        fp16_params_this_group.append(param)
                        master_param = param.detach().clone().float()
                        master_param.requires_grad = True
                        param_group['params'][i] = master_param
                        fp32_from_fp16_params_this_group.append(master_param)
                        # Reset existing state dict key to the new master param.
                        # We still need to recast per-param state tensors, if any, to FP32.
                        if param in self.optimizer.state:
                           self.optimizer.state[master_param] = self.optimizer.state.pop(param) 
                    elif param.type() == 'torch.cuda.FloatTensor':
                        self.maybe_print("FP16_Optimizer received torch.cuda.FloatTensor with {}"
                                         .format(param.size()))
                        fp32_params_this_group.append(param)
                        param_group['params'][i] = param
                    else:
                        raise TypeError("Wrapped parameters must be either "
                                        "torch.cuda.FloatTensor or torch.cuda.HalfTensor. "  
                                        "Received {}".format(param.type()))
            
            self.fp16_groups.append(fp16_params_this_group)
            self.fp32_from_fp16_groups.append(fp32_from_fp16_params_this_group)
            self.fp32_from_fp32_groups.append(fp32_params_this_group)

        self.all_fp16_params = []
        for group in self.fp16_groups:
            self.all_fp16_params += group

        self.all_fp32_from_fp16_params = []
        for group in self.fp32_from_fp16_groups:
            self.all_fp32_from_fp16_params += group

        self.all_fp32_from_fp32_params = []
        for group in self.fp32_from_fp32_groups:
            self.all_fp32_from_fp32_params += group

        # Leverage state_dict() and load_state_dict() to recast preexisting per-param state tensors
        self.optimizer.load_state_dict(self.optimizer.state_dict())
        # alternative way to cast per-param state tensors:
        # self.optimizer.load_state_dict(init_state_dict)

        if dynamic_loss_scale:
            self.dynamic_loss_scale = True
            if dynamic_loss_args is not None:
                self.loss_scaler = LossScaler("dynamic", **dynamic_loss_args)
            else:
                self.loss_scaler = LossScaler("dynamic")
        else:
            self.dynamic_loss_scale = False
            self.loss_scaler = LossScaler(static_loss_scale)

        self.overflow = False
        self.first_closure_call_this_step = True

        self.clip_grad_norm = clip_grad_norm

        # TODO:  Centralize exposure and import error checking for the C backend.
        if multi_tensor_applier.available:
            import amp_C
            self.multi_tensor_scale = amp_C.multi_tensor_scale
            self._dummy_overflow_buf = torch.cuda.IntTensor([0]);

    # Having self.maybe_print distinct from _amp_state.maybe_print is another artifact
    # of having to support FP16_Optimizer separately, for the time being.
    def maybe_print(self, msg):
        if self.verbose:
            print(msg)
            
    def __getstate__(self):
        raise RuntimeError("FP16_Optimizer should be serialized using state_dict().")

    def __setstate__(self, state):
        raise RuntimeError("FP16_Optimizer should be deserialized using load_state_dict().")

    def zero_grad(self, set_grads_to_None=False):
        """
        Zero fp32 and fp16 parameter grads.
        """
        # In principle, only the .grad attributes of the model params need to be zeroed,
        # because gradients are copied into the FP32 master params.  However, we zero
        # all gradients owned by the optimizer, just to be safe:
        for group in self.optimizer.param_groups:
             for p in group['params']:
                 if set_grads_to_None:
                     p.grad = None
                 else:
                     if p.grad is not None:
                         p.grad.detach_()
                         p.grad.zero_()

        # Zero fp16 gradients owned by the model:
        for fp16_group in self.fp16_groups:
            for param in fp16_group:
                if set_grads_to_None:
                    param.grad = None
                else:
                    if param.grad is not None:
                        param.grad.detach_() # as in torch.optim.optimizer.zero_grad()
                        param.grad.zero_()

    # Should not be used anymore.
    # def _check_overflow(self):
    #     params = []
    #     for group in self.fp16_groups:
    #         for param in group:
    #             params.append(param)
    #     for group in self.fp32_from_fp32_groups:
    #         for param in group:
    #             params.append(param)
    #     self.overflow = self.loss_scaler.has_overflow(params)

    # def _update_scale(self, has_overflow=False):
    #     self.loss_scaler.update_scale(has_overflow)

    def _master_params_to_model_params(self):
        if multi_tensor_applier.available:
            if len(self.all_fp16_params) > 0:
                multi_tensor_applier(
                    self.multi_tensor_scale,
                    self._dummy_overflow_buf,
                    [self.all_fp32_from_fp16_params, self.all_fp16_params],
                    1.0)
        else:
            for fp16_group, fp32_from_fp16_group in zip(self.fp16_groups, self.fp32_from_fp16_groups):
                master_params_to_model_params(fp16_group, fp32_from_fp16_group)

    # To consider:  Integrate distributed with this wrapper by registering a hook on each variable
    # that does the overflow check, gradient copy + downscale, and fp32 allreduce in a different stream.
    # def _model_grads_to_master_grads(self):
    #     for fp16_group, fp32_from_fp16_group in zip(self.fp16_groups, self.fp32_from_fp16_groups):
    #         model_grads_to_master_grads(fp16_group, fp32_from_fp16_group)

    # def _downscale_master(self):
    #     if self.loss_scale != 1.0:
    #         for group in self.optimizer.param_groups:
    #             for param in group['params']:
    #                 if param.grad is not None:
    #                     param.grad.data.mul_(1./self.loss_scale)

    def clip_master_grads(self, max_norm, norm_type=2):
        """
        Clips fp32 master gradients via ``torch.nn.utils.clip_grad_norm``.

        Args:
            max_norm (float or int): max norm of the gradients
            norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
                infinity norm.

        Returns:
            Total norm of the current fp32 gradients (viewed as a single vector).

        .. warning::
            Returns -1 if the most recently computed fp16 gradients overflowed (that is, if ``self.overflow`` is ``True``).
        """
        if not self.overflow:
            fp32_params = []
            for param_group in self.optimizer.param_groups:
                for param in param_group['params']:
                    fp32_params.append(param)
            return self.clip_grad_norm(fp32_params, max_norm, norm_type)
        else:
            return -1

    def state_dict(self):
        """
        Returns a dict containing the current state of this :class:`FP16_Optimizer` instance.
        This dict contains attributes of :class:`FP16_Optimizer`, as well as the state_dict
        of the contained Pytorch optimizer.
        Example::

            checkpoint = {}
            checkpoint['model'] = model.state_dict()
            checkpoint['optimizer'] = optimizer.state_dict()
            torch.save(checkpoint, "saved.pth")
        """
        state_dict = {}
        state_dict['loss_scaler'] = self.loss_scaler
        state_dict['dynamic_loss_scale'] = self.dynamic_loss_scale
        state_dict['overflow'] = self.overflow
        state_dict['first_closure_call_this_step'] = self.first_closure_call_this_step
        state_dict['optimizer_state_dict'] = self.optimizer.state_dict()
        state_dict['fp32_from_fp16'] = self.fp32_from_fp16_groups
        return state_dict

    def load_state_dict(self, state_dict):
        """
        Loads a state_dict created by an earlier call to state_dict(). 
        If ``fp16_optimizer_instance`` was constructed from some ``init_optimizer``, 
        whose parameters in turn came from ``model``, it is expected that the user 
        will call ``model.load_state_dict()`` before
        ``fp16_optimizer_instance.load_state_dict()`` is called.

        Example::

            model = torch.nn.Linear(D_in, D_out).cuda().half()
            optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
            optimizer = FP16_Optimizer(optimizer, static_loss_scale = 128.0)
            ...
            checkpoint = torch.load("saved.pth")
            model.load_state_dict(checkpoint['model'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        """
        # I think it should actually be ok to reload the optimizer before the model.
        self.loss_scaler = state_dict['loss_scaler']
        self.dynamic_loss_scale = state_dict['dynamic_loss_scale']
        self.overflow = state_dict['overflow']
        self.first_closure_call_this_step = state_dict['first_closure_call_this_step']
        self.optimizer.load_state_dict(state_dict['optimizer_state_dict'])
        # At this point, the optimizer's references to the model's fp32 parameters are up to date.
        # The optimizer's hyperparameters and internal buffers are also up to date.  
        # However, the fp32 master copies of the model's fp16 params stored by the optimizer are still
        # out of date.  There are two options.  
        # 1:  Refresh the master params from the model's fp16 params.  
        # This requires less storage but incurs precision loss.
        # 2:  Save and restore the fp32 master copies separately.
        # We choose option 2.
        # 
        # Pytorch Optimizer.load_state_dict casts saved buffers (e.g. momentum) to the type and device 
        # of their associated parameters, because it's possible those buffers might not exist yet in 
        # the current optimizer instance.  In our case, as long as the current FP16_Optimizer has been 
        # constructed in the same way as the one whose state_dict we are loading, the same master params
        # are guaranteed to exist, so we can just copy_() from the saved master params.
        for current_group, saved_group in zip(self.fp32_from_fp16_groups, state_dict['fp32_from_fp16']):
            for current, saved in zip(current_group, saved_group):
                current.data.copy_(saved.data)

    def step(self, closure=None): # could add clip option.
        """
        If no closure is supplied, :attr:`step` should be called after 
        ``fp16_optimizer_obj.backward(loss)``.
        :attr:`step` updates the fp32 master copy of parameters using the optimizer supplied to
        :class:`FP16_Optimizer`'s constructor, then copies the updated fp32 params into the fp16 params
        originally referenced by :class:`FP16_Optimizer`'s constructor, so the user may immediately run
        another forward pass using their model.

        If a closure is supplied, :attr:`step` may be called without a prior call to 
        :attr:`backward(loss)`.
        This control flow is identical to `ordinary Pytorch optimizer use`_ with closures.
        However, the user should take care that any ``loss.backward()`` call within the closure
        has been replaced by ``fp16_optimizer_obj.backward(loss)``.

        Args:
           closure (optional):  Closure that will be supplied to the underlying optimizer originally passed to :class:`FP16_Optimizer`'s constructor.  closure should call :attr:`zero_grad()` on the :class:`FP16_Optimizer` object, compute the loss, call :attr:`backward(loss)`, and return the loss.

        Example with closure::

            # optimizer is assumed to be an FP16_Optimizer object, previously constructed from an 
            # existing pytorch optimizer.
            for input, target in dataset:
                def closure():
                    optimizer.zero_grad()
                    output = model(input)
                    loss = loss_fn(output, target)
                    # loss.backward() becomes:
                    optimizer.backward(loss)
                    return loss
                optimizer.step(closure)

        .. warning::
            Currently, calling :attr:`step` with a closure is not compatible with dynamic loss scaling.

        .. _`ordinary Pytorch optimizer use`:
            http://pytorch.org/docs/master/optim.html#optimizer-step-closure
        """

        scale = self.loss_scaler.loss_scale()
        # To consider:  Should this be in step(), or update_master_grads?  It works either way,
        # but I should make it consistent with the Amp control flow, which updates the scale
        # during backward context manager exit.
        # self._update_scale(self.overflow)

        if self.overflow:
            # Using _amp_state.maybe_print instead of self.print here is intentional.
            maybe_print("Gradient overflow.  Skipping step, reducing " +
                "loss scale to {}".format(self.loss_scaler.loss_scale()))
            return
        
        if closure is not None:
            retval = self._step_with_closure(closure)
        else:
            # torch.cuda.nvtx.range_push("pytorch optimizer step")
            retval = self.optimizer.step()
            # torch.cuda.nvtx.range_pop()

        self._master_params_to_model_params()

        return retval

    def _step_with_closure(self, closure):
        def wrapped_closure():
            # helpful for debugging
            # print("Calling wrapped_closure, first_closure_call_this_step = {}"
            #       .format(self.first_closure_call_this_step))
            if self.first_closure_call_this_step:
                # We expect that the fp16 params are initially fresh on entering self.step(),
                # so _master_params_to_model_params() is unnecessary the first time wrapped_closure()
                # is called within self.optimizer.step().
                self.first_closure_call_this_step = False
            else:
                # If self.optimizer.step() internally calls wrapped_closure more than once,
                # it may update the fp32 params after each call.  However, self.optimizer 
                # doesn't know about the fp16 params at all.  If the fp32 params get updated,
                # we can't rely on self.optimizer to refresh the fp16 params.  We need
                # to handle that manually:
                self._master_params_to_model_params()
            # Our API expects the user to give us ownership of the backward() call by
            # replacing all calls to loss.backward() with optimizer.backward(loss).
            # This requirement holds whether or not the call to backward() is made within a closure.
            # If the user is properly calling optimizer.backward(loss) within "closure," 
            # calling closure() here will give the fp32 master params fresh gradients
            # for the optimizer to play with, so all wrapped_closure needs to do is call 
            # closure() and return the loss.
            temp_loss = closure() 
            while(self.overflow):
                scale = self.loss_scaler.loss_scale()
                # self._update_scale(self.overflow) # now done at the end of backward
                print("OVERFLOW within closure! Skipping step, reducing loss scale to {}".format(
                      self.loss_scaler.loss_scale()))
                temp_loss = closure()
            return temp_loss

        retval = self.optimizer.step(wrapped_closure)

        self.first_closure_call_this_step = True

        return retval

    def backward(self, loss, update_master_grads=True, retain_graph=False):
        """ 
        :attr:`backward` performs the following conceptual steps:

        1. fp32_loss = loss.float() (see first Note below)
        2. scaled_loss = fp32_loss*loss_scale
        3. scaled_loss.backward(), which accumulates scaled gradients into the ``.grad`` attributes of the model's leaves (which may be fp16, fp32, or a mixture, depending how your model was defined).
        4. fp16 grads are then copied to the master params' ``.grad`` attributes (see second Note), which are guaranteed to be fp32.
        5. Finally, master grads are divided by loss_scale.

        In this way, after :attr:`backward`, the master params have fresh gradients,
        and :attr:`step` may be called.

        .. note::
            :attr:`backward` internally converts the loss to fp32 before applying the loss scale.
            This provides some additional safety against overflow if the user has supplied an 
            fp16 loss value.  
            However, for maximum overflow safety, the user should
            compute the loss criterion (MSE, cross entropy, etc) in fp32 before supplying it to 
            :attr:`backward`.

        .. warning::
            The gradients found in a model's leaves after the call to 
            :attr:`backward` should not be regarded as valid in general, 
            because it's possible 
            they have been scaled (and in the case of dynamic loss scaling, 
            the scale factor may change over time).  
            If the user wants to inspect gradients after a call to :attr:`backward`,  
            only the master gradients should be regarded as valid.  These can be retrieved via
            :attr:`inspect_master_grad_data()`.

        Args:
            loss:  The loss output by the user's model.  loss may be either float or half (but see first Note above).
            update_master_grads (bool, optional, default=True):  Option to copy fp16 grads to fp32 grads on this call.  By setting this to False, the user can delay the copy, which is useful to eliminate redundant fp16->fp32 grad copies if :attr:`backward` is being called on multiple losses in one iteration.  If set to False, the user becomes responsible for calling :attr:`update_master_grads` before calling :attr:`step`.
            retain_graph (bool, optional, default=False):  Forwards the usual ``retain_graph=True`` option to the internal call to ``loss.backward``.  If ``retain_graph`` is being used to accumulate gradient values from multiple backward passes before calling ``optimizer.step``, passing ``update_master_grads=False`` is also recommended (see Example below).

        Example::

            # Ordinary operation:
            optimizer.backward(loss)

            # Naive operation with multiple losses (technically valid, but less efficient):
            # fp32 grads will be correct after the second call,  but 
            # the first call incurs an unnecessary fp16->fp32 grad copy.
            optimizer.backward(loss1)
            optimizer.backward(loss2)

            # More efficient way to handle multiple losses:
            # The fp16->fp32 grad copy is delayed until fp16 grads from all 
            # losses have been accumulated.
            optimizer.backward(loss1, update_master_grads=False)
            optimizer.backward(loss2, update_master_grads=False)
            optimizer.update_master_grads()
        """ 
        # To consider:  try multiple backward passes using retain_grad=True to find 
        # a loss scale that works.  After you find a loss scale that works, do a final dummy
        # backward pass with retain_graph=False to tear down the graph.  Doing this would avoid 
        # discarding the iteration,  but probably wouldn't improve overall efficiency.  
        scaled_loss = loss.float()*self.loss_scaler.loss_scale()
        scaled_loss.backward(retain_graph=retain_graph)
        if update_master_grads:
            self.update_master_grads()

    def update_master_grads(self):
        # torch.cuda.nvtx.range_push("update_master_grads")
        """
        Copy the ``.grad`` attribute from stored references to fp16 parameters to 
        the ``.grad`` attribute of the fp32 master parameters that are directly 
        updated by the optimizer.  :attr:`update_master_grads` only needs to be called if
        ``fp16_optimizer_obj.backward`` was called with ``update_master_grads=False``.
        """
        # if self.dynamic_loss_scale:
        #     self._check_overflow()
        #     if self.overflow: return
        # self._model_grads_to_master_grads()
        # self._downscale_master()
        # Use the one-shot multi-tensor apply kernel
        self.loss_scaler.clear_overflow_state()
        if len(self.all_fp16_params) > 0:
            # print("Model grads before")
            # print([param.grad.data for param in self.all_fp16_params])
            # I'm ONLY writing this as an incremental way to make some tests pass until
            # I can refactor the tests as well.
            # FP16_Optimizer should not be used by anyone.
            model_grads = []
            master_grads = []
            for model_param, master_param in zip(self.all_fp16_params,
                                                 self.all_fp32_from_fp16_params):
                if model_param.grad is not None:
                    model_grads.append(model_param.grad)
                    if master_param.grad is None:
                        master_param.grad = torch.empty_like(master_param)
                    master_grads.append(master_param.grad)
            self.loss_scaler.unscale(
                model_grads,
                master_grads,
                self.loss_scaler.loss_scale())
            # print("Master grads after")
            # print([param.grad.data for param in self.all_fp32_from_fp16_params])
        if len(self.all_fp32_from_fp32_params) > 0:
            model_grads = []
            master_grads = []
            for model_param, master_param in zip(self.all_fp32_from_fp32_params,
                                                 self.all_fp32_from_fp32_params):
                if model_param.grad is not None:
                    model_grads.append(model_param.grad)
                    master_grads.append(master_param.grad)
            # print("Model grads before")
            # print([param.grad.data for param in self.all_fp32_from_fp32_params])
            self.loss_scaler.unscale(
                model_grads,
                master_grads,
                self.loss_scaler.loss_scale())
            # print("Master grads after")
            # print([param.grad.data for param in self.all_fp32_from_fp32_params])
        # quit()
        self.overflow = self.loss_scaler.update_scale()
        # torch.cuda.nvtx.range_pop()


    def inspect_master_grad_data(self):
        """
        When running with :class:`FP16_Optimizer`, 
        ``.grad`` attributes of a model's fp16 leaves should not be
        regarded as truthful, because they might be scaled.  
        After a call to :attr:`fp16_optimizer_obj.backward(loss)`, if no overflow was encountered,
        the fp32 master params' ``.grad``
        attributes will contain valid gradients properly divided by the loss scale.  However, 
        because :class:`FP16_Optimizer` flattens some parameters, accessing them may be 
        nonintuitive.  :attr:`inspect_master_grad_data`
        allows those gradients to be viewed with shapes corresponding to their associated model leaves.

        Returns:
            List of lists (one list for each parameter group).  The list for each parameter group
            is a list of the ``.grad.data`` attributes of the fp32 master params belonging to that group.                 
        """
        if self.overflow:
            print("Warning:  calling FP16_Optimizer.inspect_master_grad_data while in an overflow state.  "
                  "Gradients are currently invalid (may be inf, nan, or stale).  Returning None.")
            return None
        else:
            # The optimizer owns only references to master params.
            master_grads_data = []
            for param_group in self.optimizer.param_groups:
                master_grads_this_group = []
                for param in param_group['params']:
                    if param.grad is not None:
                        master_grads_this_group.append(param.grad.data)
                    else:
                        master_grads_this_group.append(None)
                master_grads_data.append(master_grads_this_group)
            return master_grads_data


    # Promote loss scale so it can be retrieved or set via "fp16_optimizer_instance.loss_scale"
    def _get_loss_scale(self):
        return self.loss_scaler.loss_scale()

    def _set_loss_scale(self, value):
        self.loss_scaler._loss_scale = value

    loss_scale = property(_get_loss_scale, _set_loss_scale)

    # Promote state so it can be retrieved or set via "fp16_optimizer_instance.state"
    def _get_state(self):
        return self.optimizer.state

    def _set_state(self, value):
        self.optimizer.state = value

    state = property(_get_state, _set_state)

    # Promote param_groups so it can be retrieved or set via "fp16_optimizer_instance.param_groups"
    # (for example, to adjust the learning rate)
    def _get_param_groups(self):
        return self.optimizer.param_groups

    def _set_param_groups(self, value):
        self.optimizer.param_groups = value

    param_groups = property(_get_param_groups, _set_param_groups)


================================================
FILE: apex/apex/fp16_utils/fp16util.py
================================================
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors


class tofp16(nn.Module):
    """
    Utility module that implements::

        def forward(self, input):
            return input.half()
    """

    def __init__(self):
        super(tofp16, self).__init__()

    def forward(self, input):
        return input.half()


def BN_convert_float(module):
    """
    Utility function for network_to_half().

    Retained for legacy purposes.
    """
    if isinstance(module, torch.nn.modules.batchnorm._BatchNorm) and module.affine is True:
        module.float()
    for child in module.children():
        BN_convert_float(child)
    return module


def network_to_half(network):
    """
    Convert model to half precision in a batchnorm-safe way.

    Retained for legacy purposes. It is recommended to use FP16Model.
    """
    return nn.Sequential(tofp16(), BN_convert_float(network.half()))


def convert_module(module, dtype):
    """
    Converts a module's immediate parameters and buffers to dtype.
    """
    for param in module.parameters(recurse=False):
        if param is not None:
            if param.data.dtype.is_floating_point:
                param.data = param.data.to(dtype=dtype)
            if param._grad is not None and param._grad.data.dtype.is_floating_point:
                param._grad.data = param._grad.data.to(dtype=dtype)

    for buf in module.buffers(recurse=False):
        if buf is not None and buf.data.dtype.is_floating_point:
            buf.data = buf.data.to(dtype=dtype)


def convert_network(network, dtype):
    """
    Converts a network's parameters and buffers to dtype.
    """
    for module in network.modules():
        if isinstance(module, torch.nn.modules.batchnorm._BatchNorm) and module.affine is True:
            continue
        convert_module(module, dtype)
        if isinstance(module, torch.nn.RNNBase) or isinstance(module, torch.nn.modules.rnn.RNNBase):
            module.flatten_parameters()
    return network


class FP16Model(nn.Module):
    """
    Convert model to half precision in a batchnorm-safe way.
    """

    def __init__(self, network):
        super(FP16Model, self).__init__()
        self.network = convert_network(network, dtype=torch.half)

    def forward(self, *inputs):
        inputs = tuple(t.half() for t in inputs)
        return self.network(*inputs)


def backwards_debug_hook(grad):
    raise RuntimeError("master_params recieved a gradient in the backward pass!")

def prep_param_lists(model, flat_master=False):
    """
    Creates a list of FP32 master parameters for a given model, as in
    `Training Neural Networks with Mixed Precision:  Real Examples`_.

    Args:
        model (torch.nn.Module): Existing Pytorch model
        flat_master (bool, optional, default=False):  Flatten the master parameters into a single tensor, as a performance optimization.
    Returns:
        A tuple (``model_params``, ``master_params``). ``model_params`` is a list of the model's parameters for later use with :func:`model_grads_to_master_grads` and :func:`master_params_to_model_params`.  ``master_params`` is a list of FP32 master gradients.  If ``flat_master=True``, ``master_params`` will be a list with one element.

    Example::

        model_params, master_params = prep_param_lists(model)

    .. warning::
        Currently, if ``flat_master=True``, all the model's parameters must be the same type.  If the model has parameters of different types, use ``flat_master=False``, or use :class:`FP16_Optimizer`.

    .. _`Training Neural Networks with Mixed Precision:  Real Examples`:
        http://on-demand.gputechconf.com/gtc/2018/video/S81012/
    """
    model_params = [param for param in model.parameters() if param.requires_grad]

    if flat_master:
        # Give the user some more useful error messages
        try:
            # flatten_dense_tensors returns a contiguous flat array.
            # http://pytorch.org/docs/master/_modules/torch/_utils.html
            master_params = _flatten_dense_tensors([param.data for param in model_params]).float()
        except:
            print("Error in prep_param_lists:  model may contain a mixture of parameters "
                      "of different types.  Use flat_master=False, or use F16_Optimizer.")
            raise
        master_params = torch.nn.Parameter(master_params)
        master_params.requires_grad = True
        # master_params.register_hook(backwards_debug_hook)
        if master_params.grad is None:
            master_params.grad = master_params.new(*master_params.size())
        return model_params, [master_params]
    else:
        master_params = [param.clone().float().detach() for param in model_params]
        for param in master_params:
            param.requires_grad = True
        return model_params, master_params


def model_grads_to_master_grads(model_params, master_params, flat_master=False):
    """
    Copy model gradients to master gradients.  

    Args:
        model_params:  List of model parameters created by :func:`prep_param_lists`.
        master_params:  List of FP32 master parameters created by :func:`prep_param_lists`.  If ``master_params`` was created with ``flat_master=True``, ``flat_master=True`` should also be supplied to :func:`model_grads_to_master_grads`.
    """
    if flat_master:
        # The flattening may incur one more deep copy than is necessary.
        master_params[0].grad.data.copy_(
            _flatten_dense_tensors([p.grad.data for p in model_params]))
    else:
        for model, master in zip(model_params, master_params):
            if model.grad is not None:
                if master.grad is None:
                    master.grad = Variable(master.data.new(*master.data.size()))
                master.grad.data.copy_(model.grad.data)
            else:
                master.grad = None


def master_params_to_model_params(model_params, master_params, flat_master=False):
    """
    Copy master parameters to model parameters.

    Args:
        model_params:  List of model parameters created by :func:`prep_param_lists`.
        master_params:  List of FP32 master parameters created by :func:`prep_param_lists`.  If ``master_params`` was created with ``flat_master=True``, ``flat_master=True`` should also be supplied to :func:`master_params_to_model_params`.
    """
    if flat_master:
        for model, master in zip(model_params, 
                                 _unflatten_dense_tensors(master_params[0].data, model_params)):
            model.data.copy_(master)
    else:
        for model, master in zip(model_params, master_params):
            model.data.copy_(master.data)

# Backward compatibility fixes

def to_python_float(t):
    if hasattr(t, 'item'):
        return t.item()
    else:
        return t[0]

TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])
if TORCH_MAJOR == 0 and TORCH_MINOR <= 4:
    clip_grad_norm = torch.nn.utils.clip_grad_norm
else:
    clip_grad_norm = torch.nn.utils.clip_grad_norm_


================================================
FILE: apex/apex/fp16_utils/loss_scaler.py
================================================
import torch

# item() is a recent addition, so this helps with backward compatibility.
def to_python_float(t):
    if hasattr(t, 'item'):
        return t.item()
    else:
        return t[0]

class LossScaler:
    """
    Class that manages a static loss scale.  This class is intended to interact with
    :class:`FP16_Optimizer`, and should not be directly manipulated by the user.

    Use of :class:`LossScaler` is enabled via the ``static_loss_scale`` argument to 
    :class:`FP16_Optimizer`'s constructor.

    Args:
        scale (float, optional, default=1.0):  The loss scale.
    """

    def __init__(self, scale=1):
        self.cur_scale = scale

    # `params` is a list / generator of torch.Variable
    def has_overflow(self, params):
        return False

    # `x` is a torch.Tensor
    def _has_inf_or_nan(x):
        return False

    def update_scale(self, overflow):
        pass

    @property
    def loss_scale(self):
        return self.cur_scale

    def scale_gradient(self, module, grad_in, grad_out):
        return tuple(self.loss_scale * g for g in grad_in)

    def backward(self, loss, retain_graph=False):
        scaled_loss = loss*self.loss_scale
        scaled_loss.backward(retain_graph=retain_graph)

class DynamicLossScaler:
    """
    Class that manages dynamic loss scaling.  It is recommended to use :class:`DynamicLossScaler`
    indirectly, by supplying ``dynamic_loss_scale=True`` to the constructor of 
    :class:`FP16_Optimizer`.  However, it's important to understand how :class:`DynamicLossScaler`
    operates, because the default options can be changed using the
    the ``dynamic_loss_args`` argument to :class:`FP16_Optimizer`'s constructor.

    Loss scaling is designed to combat the problem of underflowing gradients encountered at long
    times when training fp16 networks.  Dynamic loss scaling begins by attempting a very high loss
    scale.  Ironically, this may result in OVERflowing gradients.  If overflowing gradients are
    encountered, :class:`DynamicLossScaler` informs :class:`FP16_Optimizer` that an overflow has 
    occurred.
    :class:`FP16_Optimizer` then skips the update step for this particular iteration/minibatch,
    and :class:`DynamicLossScaler` adjusts the loss scale to a lower value.  
    If a certain number of iterations occur without overflowing gradients detected,
    :class:`DynamicLossScaler` increases the loss scale once more.
    In this way :class:`DynamicLossScaler` attempts to "ride the edge" of 
    always using the highest loss scale possible without incurring overflow.

    Args:
        init_scale (float, optional, default=2**32):  Initial loss scale attempted by :class:`DynamicLossScaler.`
        scale_factor (float, optional, default=2.0):  Factor used when adjusting the loss scale. If an overflow is encountered, the loss scale is readjusted to loss scale/``scale_factor``.  If ``scale_window`` consecutive iterations take place without an overflow, the loss scale is readjusted to loss_scale*``scale_factor``. 
        scale_window (int, optional, default=1000):  Number of consecutive iterations without an overflow to wait before increasing the loss scale.
    """

    def __init__(self,
                 init_scale=2**32,
                 scale_factor=2.,
                 scale_window=1000):
        self.cur_scale = init_scale
        self.cur_iter = 0
        self.last_overflow_iter = -1
        self.scale_factor = scale_factor
        self.scale_window = scale_window

    # `params` is a list / generator of torch.Variable
    def has_overflow(self, params):
        for p in params:
            if p.grad is not None and DynamicLossScaler._has_inf_or_nan(p.grad.data):
                return True

        return False

    # `x` is a torch.Tensor
    def _has_inf_or_nan(x):
        try:
            # if x is half, the .float() incurs an additional deep copy, but it's necessary if 
            # Pytorch's .sum() creates a one-element tensor of the same type as x 
            # (which is true for some recent version of pytorch).
            cpu_sum = float(x.float().sum())
            # More efficient version that can be used if .sum() returns a Python scalar
            # cpu_sum = float(x.sum())
        except RuntimeError as instance:
            # We want to check if inst is actually an overflow exception.
            # RuntimeError could come from a different error.
            # If so, we still want the exception to propagate.
            if "value cannot be converted" not in instance.args[0]:
                raise
            return True
        else:
            if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum:
                return True
            return False

    # `overflow` is boolean indicating whether the gradient overflowed
    def update_scale(self, overflow):
        if overflow:
            # self.cur_scale /= self.scale_factor
            self.cur_scale = max(self.cur_scale/self.scale_factor, 1)
            self.last_overflow_iter = self.cur_iter
        else:
            if (self.cur_iter - self.last_overflow_iter) % self.scale_window == 0:
                self.cur_scale *= self.scale_factor
        self.cur_iter += 1

    @property
    def loss_scale(self):
        return self.cur_scale

    def scale_gradient(self, module, grad_in, grad_out):
        return tuple(self.loss_scale * g for g in grad_in)

    def backward(self, loss, retain_graph=False):
        scaled_loss = loss*self.loss_scale
        scaled_loss.backward(retain_graph=retain_graph)
        
##############################################################        
# Example usage below here -- assuming it's in a separate file
##############################################################
"""
TO-DO separate out into an example.
if __name__ == "__main__":
    import torch
    from torch.autograd import Variable
    from dynamic_loss_scaler import DynamicLossScaler

    # N is batch size; D_in is input dimension;
    # H is hidden dimension; D_out is output dimension.
    N, D_in, H, D_out = 64, 1000, 100, 10

    # Create random Tensors to hold inputs and outputs, and wrap them in Variables.
    x = Variable(torch.randn(N, D_in), requires_grad=False)
    y = Variable(torch.randn(N, D_out), requires_grad=False)

    w1 = Variable(torch.randn(D_in, H), requires_grad=True)
    w2 = Variable(torch.randn(H, D_out), requires_grad=True)
    parameters = [w1, w2]

    learning_rate = 1e-6
    optimizer = torch.optim.SGD(parameters, lr=learning_rate)
    loss_scaler = DynamicLossScaler()

    for t in range(500):
        y_pred = x.mm(w1).clamp(min=0).mm(w2)
        loss = (y_pred - y).pow(2).sum() * loss_scaler.loss_scale
        print('Iter {} loss scale: {}'.format(t, loss_scaler.loss_scale))
        print('Iter {} scaled loss: {}'.format(t, loss.data[0]))
        print('Iter {} unscaled loss: {}'.format(t, loss.data[0] / loss_scaler.loss_scale))

        # Run backprop
        optimizer.zero_grad()
        loss.backward()
        
        # Check for overflow
        has_overflow = DynamicLossScaler.has_overflow(parameters)
        
        # If no overflow, unscale grad and update as usual
        if not has_overflow:
            for param in parameters:
                param.grad.data.mul_(1. / loss_scaler.loss_scale)
            optimizer.step()
        # Otherwise, don't do anything -- ie, skip iteration
        else:
            print('OVERFLOW!')

        # Update loss scale for next iteration
        loss_scaler.update_scale(has_overflow)

"""


================================================
FILE: apex/apex/multi_tensor_apply/__init__.py
================================================
from .multi_tensor_apply import MultiTensorApply

multi_tensor_applier = MultiTensorApply(2048*32)


================================================
FILE: apex/apex/multi_tensor_apply/multi_tensor_apply.py
================================================
import torch

class MultiTensorApply(object):
    available = False
    warned = False

    def __init__(self, chunk_size):
        try:
            import amp_C
            MultiTensorApply.available = True
            self.chunk_size = chunk_size
        except ImportError as err:
            MultiTensorApply.available = False
            MultiTensorApply.import_err = err

    def check_avail(self):
        if MultiTensorApply.available == False:
            raise RuntimeError(
                "Attempted to call MultiTensorApply method, but MultiTensorApply "
                "is not available, possibly because Apex was installed without "
                "--cpp_ext --cuda_ext.  Original import error message:",
                MultiTensorApply.import_err)

    def __call__(self, op, noop_flag_buffer, tensor_lists, *args):
        self.check_avail()

        return op(self.chunk_size,
                  noop_flag_buffer,
                  tensor_lists,
                  *args)


================================================
FILE: apex/apex/normalization/__init__.py
================================================
from .fused_layer_norm import FusedLayerNorm


================================================
FILE: apex/apex/normalization/fused_layer_norm.py
================================================
import math
import torch
import numbers
from torch.nn.parameter import Parameter
from torch.nn import init
from torch.nn import functional as F
import importlib

class FusedLayerNormAffineFunction(torch.autograd.Function):
  def __init__(self, normalized_shape, eps=1e-6):
    global fused_layer_norm_cuda
    fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")

    self.normalized_shape = normalized_shape
    self.eps = eps

  def forward(self, input, weight, bias):
    input_ = input.contiguous()
    weight_ = weight.contiguous()
    bias_ = bias.contiguous()
    output, mean, invvar = fused_layer_norm_cuda.forward_affine(
        input_, self.normalized_shape, weight_, bias_, self.eps)
    self.save_for_backward(input_, weight_, bias_, mean, invvar)
    return output

  def backward(self, grad_output):
    input_, weight_, bias_, mean, invvar = self.saved_tensors
    grad_input = grad_weight = grad_bias = None
    grad_input, grad_weight, grad_bias = fused_layer_norm_cuda.backward_affine(
        grad_output.contiguous(), mean, invvar,
        input_, self.normalized_shape, 
        weight_, bias_, self.eps)
    return grad_input, grad_weight, grad_bias;
    
class FusedLayerNormFunction(torch.autograd.Function):
  def __init__(self, normalized_shape, eps=1e-6):
    global fused_layer_norm_cuda
    fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
    self.normalized_shape = normalized_shape
    self.eps = eps

  def forward(self, input):
    input_ = input.contiguous()
    output, mean, invvar = fused_layer_norm_cuda.forward(
        input_, self.normalized_shape, self.eps)
    self.save_for_backward(input_, mean, invvar)
    return output

  def backward(self, grad_output):
    input_, mean, invvar = self.saved_tensors
    grad_input = None
    grad_input = fused_layer_norm_cuda.backward(
        grad_output.contiguous(), mean, invvar,
        input_, self.normalized_shape,
        self.eps)
    return grad_input

def fused_layer_norm_affine(input, normalized_shape, weight, bias, eps=1e-6):
    return FusedLayerNormAffineFunction(normalized_shape,eps)(input, weight, bias)

def fused_layer_norm(input, normalized_shape, eps=1e-6):
    return FusedLayerNormFunction(normalized_shape,eps)(input)

class FusedLayerNorm(torch.nn.Module):
    r"""Applies Layer Normalization over a mini-batch of inputs as described in
    the paper `Layer Normalization`_ .

    Currently only runs on cuda() tensors.

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated separately over the last
    certain number dimensions which have to be of the shape specified by
    :attr:`normalized_shape`.
    :math:`\gamma` and :math:`\beta` are learnable affine transform parameters of
    :attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``.

    .. note::
        Unlike Batch Normalization and Instance Normalization, which applies
        scalar scale and bias for each entire channel/plane with the
        :attr:`affine` option, Layer Normalization applies per-element scale and
        bias with :attr:`elementwise_affine`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        normalized_shape (int or list or torch.Size): input shape from an expected input
            of size

            .. math::
                [* \times \text{normalized}\_\text{shape}[0] \times \text{normalized}\_\text{shape}[1]
                    \times \ldots \times \text{normalized}\_\text{shape}[-1]]

            If a single integer is used, it is treated as a singleton list, and this module will
            normalize over the last dimension which is expected to be of that specific size.
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        elementwise_affine: a boolean value that when set to ``True``, this module
            has learnable per-element affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``.

    Shape:
        - Input: :math:`(N, *)`
        - Output: :math:`(N, *)` (same shape as input)

    Examples::

        >>> input = torch.randn(20, 5, 10, 10)
        >>> # With Learnable Parameters
        >>> m = apex.normalization.FusedLayerNorm(input.size()[1:])
        >>> # Without Learnable Parameters
        >>> m = apex.normalization.FusedLayerNorm(input.size()[1:], elementwise_affine=False)
        >>> # Normalize over last two dimensions
        >>> m = apex.normalization.FusedLayerNorm([10, 10])
        >>> # Normalize over last dimension of size 10
        >>> m = apex.normalization.FusedLayerNorm(10)
        >>> # Activating the module
        >>> output = m(input)

    .. _`Layer Normalization`: https://arxiv.org/abs/1607.06450
    """
    def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
        super(FusedLayerNorm, self).__init__()

        global fused_layer_norm_cuda
        fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")

        if isinstance(normalized_shape, numbers.Integral):
            normalized_shape = (normalized_shape,)
        self.normalized_shape = torch.Size(normalized_shape)
        self.eps = eps
        self.elementwise_affine = elementwise_affine
        if self.elementwise_affine:
            self.weight = Parameter(torch.Tensor(*normalized_shape))
            self.bias = Parameter(torch.Tensor(*normalized_shape))
        else:
            self.register_parameter('weight', None)
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        if self.elementwise_affine:
            init.ones_(self.weight)
            init.zeros_(self.bias)

    def forward(self, input):
        if not input.is_cuda:
            return  F.layer_norm(
                input, self.normalized_shape, self.weight, self.bias, self.eps)
        if self.elementwise_affine:
          return FusedLayerNormAffineFunction(self.normalized_shape,self.eps)(
              input, self.weight, self.bias)
        else:
          return FusedLayerNormFunction(self.normalized_shape,self.eps)(
              input)

    def extra_repr(self):
        return '{normalized_shape}, eps={eps}, ' \
            'elementwise_affine={elementwise_affine}'.format(**self.__dict__)


================================================
FILE: apex/apex/optimizers/__init__.py
================================================
from .fused_adam import FusedAdam
from .fp16_optimizer import FP16_Optimizer


================================================
FILE: apex/apex/optimizers/fp16_optimizer.py
================================================
import torch
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors

class FP16_Optimizer(object):
    """
    :class:`FP16_Optimizer` A cutdown version of apex.fp16_utils.FP16_Optimizer.
    Designed only to wrap apex.optimizers.FusedAdam.
    Refer to apex.fp16_utils documents for more information.

    Example::

        model = torch.nn.Linear(D_in, D_out).cuda().half()
        optimizer = apex.optimizers.FusedAdam(model.parameters())
        # Name the FP16_Optimizer instance to replace the existing optimizer
        # (recommended but not required):
        optimizer = FP16_Optimizer(optimizer, static_loss_scale = 128.0)
        ...
        # loss.backward() becomes:
        optimizer.backward(loss)
        ...

    Example with dynamic loss scaling::

        ...
        optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
                                   # optional arg to control dynamic loss scaling behavior
                                   # dynamic_loss_args={'scale_window' : 500})
                                   # Usually, dynamic_loss_args is not necessary.
    """

    def __init__(self,
                 init_optimizer,
                 static_loss_scale=1.0,
                 dynamic_loss_scale=False,
                 dynamic_loss_args=None,
                 verbose=True):

        # The fused optimizer does all the work. We need this layer for two reason:
        # 1. maintain same user API from apex.fp16_utils
        # 2. keep common stuff here in case we need to add new fused optimizer later

        # differences from apex.fp16_utils:
        # - assume all model params in fp16
        # - assume all params requires grad
        # - flat by groups, not keeping state. TODO: remove state explicitly?
        # - master gard and unflat master weight never exist. TODO: a way to save out unflat master?
        if not torch.cuda.is_available:
            raise SystemError("Cannot use fp16 without CUDA.")
        self.optimizer = init_optimizer

        # param flattened by groups
        self.fp16_groups = []
        self.fp16_groups_flat = []
        self.fp32_groups_flat = []

        # loop to deal with groups
        for i, param_group in enumerate(self.optimizer.param_groups):
            # push this group to list before modify
            self.fp16_groups.append(param_group['params'])
            # init fp16 weight buffer, flattened
            self.fp16_groups_flat.append(_flatten_dense_tensors([p.clone().detach() for p in self.fp16_groups[i]]))
            # set model fp16 weight to slices of flattened buffer
            updated_params = _unflatten_dense_tensors(self.fp16_groups_flat[i], self.fp16_groups[i])
            for p,q in zip(self.fp16_groups[i], updated_params):
                p.data = q.data
            # init master weight, flattened
            self.fp32_groups_flat.append(self.fp16_groups_flat[i].clone().float().detach())
            # modify optimizer of have flat master weight
            self.fp32_groups_flat[i].requires_grad = True # keep this in case internal optimizer uses it
            param_group['params'] = [self.fp32_groups_flat[i]]

        # we may have a way of fusing dynamic scale. Do not support for now
        if dynamic_loss_scale:
            if dynamic_loss_args is not None:
                raise SystemError("Do not support dynamic loss scale args for now.")
            self.dynamic_loss_scale = True
            self.cur_scale = 2**16
            self.cur_iter = 0
            self.last_overflow_iter = -1
            self.scale_factor = 2
            self.scale_window = 1000
        else:
            self.dynamic_loss_scale = False
            self.cur_iter = 0
            self.cur_scale = static_loss_scale
        self.verbose = verbose

    def zero_grad(self, set_grads_to_None=True):
        """
        Zero FP16 parameter grads.
        """
        # FP32 grad should never exist.
        # For speed, set model fp16 grad to None by default
        for group in self.fp16_groups:
            for p in group:
                if set_grads_to_None:
                    p.grad = None
                else:
                    if p.grad is not None:
                        p.grad.detach_()
                        p.grad.zero_()

    def _compute_grad_norm(self, fp16_grads_flat, norm_type=2):
        """
        Compute fp16 grad norm for later clipping(fused with update).
        Internal accumulated in fp32.
        Also fused in NaN check. Possibly other reduction needed for grad.

        Args:
            fp16_grads_flat (tensor): fp16 grad flattened
            norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
                infinity norm.

        Returns:
            Total norm of the current fp16 gradients (viewed as a single vector).
            Returns -1 if the most recently computed fp16 gradients overflowed
        """
        # TODO: Not most efficient with copy to cpu and sync
        # only support 2-norm now
        # for torch version <= 1.0.1, torch.norm with dtype will fail and fall back to cast
        try:
            norm = float(torch.norm(fp16_grads_flat, 2.0, dtype=torch.float32))
        except TypeError as err:
            norm = float(torch.norm(fp16_grads_flat.float(), 2.0))
        if norm == float('inf') or norm == -float('inf') or norm != norm:
            return -1
        else:
            return norm

    def step(self, closure=None):
        """
        Not supporting closure.
        """
        # First compute norm for all group so we know if there is overflow
        grads_groups_flat = []
        norm_groups = []
        skip = False
        for i, group in enumerate(self.fp16_groups):
            grads_groups_flat.append(_flatten_dense_tensors([p.grad for p in group]))
            norm_groups.append(self._compute_grad_norm(grads_groups_flat[i]))
            if norm_groups[i] == -1: #TODO: early break
                skip = True

        if skip:
            self._update_scale(skip)
            return

        # norm is in fact norm*cur_scale
        self.optimizer.step(grads=[[g] for g in grads_groups_flat],
                            output_params=[[p] for p in self.fp16_groups_flat],
                            scale=self.cur_scale,
                            grad_norms=norm_groups)

        # TODO: we probably don't need this? just to be safe
        for i in range(len(norm_groups)):
            updated_params = _unflatten_dense_tensors(self.fp16_groups_flat[i], self.fp16_groups[i])
            for p,q in zip(self.fp16_groups[i], updated_params):
                p.data = q.data

        self._update_scale(False)
        return

    def backward(self, loss):
        """
        :attr:`backward` performs the following steps:

        1. fp32_loss = loss.float()
        2. scaled_loss = fp32_loss*loss_scale
        3. scaled_loss.backward(), which accumulates scaled gradients into the ``.grad`` attributes of the model's fp16 leaves
        """
        scaled_loss = (loss.float()) * self.cur_scale
        scaled_loss.backward()

    def _update_scale(self, skip):
        if self.dynamic_loss_scale:
            if skip:
                if self.verbose:
                    print("\nGrad overflow on iteration", self.cur_iter)
                    print("Using dynamic loss scale of", self.cur_scale)
                self.cur_scale = max(self.cur_scale/self.scale_factor, 1)
                self.last_overflow_iter = self.cur_iter
            else:
                if (self.cur_iter - self.last_overflow_iter) % self.scale_window == 0:
                    self.cur_scale *= self.scale_factor
        else:
            if skip:
                print("\nGrad overflow on iteration", self.cur_iter)
                print("Using static loss scale of", self.cur_scale)
        self.cur_iter +=1
        return

    # Promote state so it can be retrieved or set via "fp16_optimizer_instance.state"
    def _get_state(self):
        return self.optimizer.state

    def _set_state(self, value):
        self.optimizer.state = value

    state = property(_get_state, _set_state)

    # Promote param_groups so it can be retrieved or set via "fp16_optimizer_instance.param_groups"
    # (for example, to adjust the learning rate)
    def _get_param_groups(self):
        return self.optimizer.param_groups

    def _set_param_groups(self, value):
        self.optimizer.param_groups = value

    param_groups = property(_get_param_groups, _set_param_groups)

    def state_dict(self):
        """
        Returns a dict containing the current state of this :class:`FP16_Optimizer` instance.
        This dict contains attributes of :class:`FP16_Optimizer`, as well as the state_dict
        of the contained Pytorch optimizer.
        Example::
            checkpoint = {}
            checkpoint['model'] = model.state_dict()
            checkpoint['optimizer'] = optimizer.state_dict()
            torch.save(checkpoint, "saved.pth")
        """
        state_dict = {}
        state_dict['dynamic_loss_scale'] = self.dynamic_loss_scale
        state_dict['cur_scale'] = self.cur_scale
        state_dict['cur_iter'] = self.cur_iter
        if state_dict['dynamic_loss_scale']:
            state_dict['last_overflow_iter'] = self.last_overflow_iter
            state_dict['scale_factor'] = self.scale_factor
            state_dict['scale_window'] = self.scale_window
        state_dict['optimizer_state_dict'] = self.optimizer.state_dict()
        state_dict['fp32_groups_flat'] = self.fp32_groups_flat
        return state_dict

    def load_state_dict(self, state_dict):
        """
        Loads a state_dict created by an earlier call to state_dict().
        If ``fp16_optimizer_instance`` was constructed from some ``init_optimizer``,
        whose parameters in turn came from ``model``, it is expected that the user
        will call ``model.load_state_dict()`` before
        ``fp16_optimizer_instance.load_state_dict()`` is called.
        Example::
            model = torch.nn.Linear(D_in, D_out).cuda().half()
            optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
            optimizer = FP16_Optimizer(optimizer, static_loss_scale = 128.0)
            ...
            checkpoint = torch.load("saved.pth")
            model.load_state_dict(checkpoint['model'])
            optimizer.load_state_dict(checkpoint['optimizer'])
        """
        # I think it should actually be ok to reload the optimizer before the model.
        self.dynamic_loss_scale = state_dict['dynamic_loss_scale']
        self.cur_scale = state_dict['cur_scale']
        self.cur_iter = state_dict['cur_iter']
        if state_dict['dynamic_loss_scale']:
            self.last_overflow_iter = state_dict['last_overflow_iter']
            self.scale_factor = state_dict['scale_factor']
            self.scale_window = state_dict['scale_window']
        self.optimizer.load_state_dict(state_dict['optimizer_state_dict'])
        # At this point, the optimizer's references to the model's fp32 parameters are up to date.
        # The optimizer's hyperparameters and internal buffers are also up to date.
        # However, the fp32 master copies of the model's fp16 params stored by the optimizer are still
        # out of date.  There are two options.
        # 1:  Refresh the master params from the model's fp16 params.
        # This requires less storage but incurs precision loss.
        # 2:  Save and restore the fp32 master copies separately.
        # We choose option 2.
        #
        # Pytorch Optimizer.load_state_dict casts saved buffers (e.g. momentum) to the type and device
        # of their associated parameters, because it's possible those buffers might not exist yet in
        # the current optimizer instance.  In our case, as long as the current FP16_Optimizer has been
        # constructed in the same way as the one whose state_dict we are loading, the same master params
        # are guaranteed to exist, so we can just copy_() from the saved master params.
        for current, saved in zip(self.fp32_groups_flat, state_dict['fp32_groups_flat']):
            current.data.copy_(saved.data)


================================================
FILE: apex/apex/optimizers/fused_adam.py
================================================
import types
import torch
import importlib

class FusedAdam(torch.optim.Optimizer):

    """Implements Adam algorithm. Currently GPU-only.  Requires Apex to be installed via
    ``python setup.py install --cuda_ext --cpp_ext``.

    It has been proposed in `Adam: A Method for Stochastic Optimization`_.

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups.
        lr (float, optional): learning rate. (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square. (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability. (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
            algorithm from the paper `On the Convergence of Adam and Beyond`_
            (default: False) NOT SUPPORTED in FusedAdam!
        eps_inside_sqrt (boolean, optional): in the 'update parameters' step,
            adds eps to the bias-corrected second moment estimate before
            evaluating square root instead of adding it to the square root of
            second moment estimate as in the original paper. (default: False)

    .. _Adam\: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980
    .. _On the Convergence of Adam and Beyond:
        https://openreview.net/forum?id=ryQu7f-RZ
    """

    def __init__(self, params,
                 lr=1e-3, bias_correction = True,
                 betas=(0.9, 0.999), eps=1e-8, eps_inside_sqrt = False,
                 weight_decay=0., max_grad_norm=0., amsgrad=False):
        global fused_adam_cuda
        fused_adam_cuda = importlib.import_module("fused_adam_cuda")

        if amsgrad:
            raise RuntimeError('FusedAdam does not support the AMSGrad variant.')
        defaults = dict(lr=lr, bias_correction=bias_correction,
                        betas=betas, eps=eps, weight_decay=weight_decay,
                        max_grad_norm=max_grad_norm)
        super(FusedAdam, self).__init__(params, defaults)
        self.eps_mode = 0 if  eps_inside_sqrt else 1

    def step(self, closure=None, grads=None, output_params=None, scale=1., grad_norms=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
            grads (list of tensors, optional): weight gradient to use for the
                optimizer update. If gradients have type torch.half, parameters
                are expected to be in type torch.float. (default: None)
            output params (list of tensors, optional): A reduced precision copy
                of the updated weights written out in addition to the regular
                updated weights. Have to be of same type as gradients. (default: None)
            scale (float, optional): factor to divide gradient tensor values
                by before applying to weights. (default: 1)
        """
        loss = None
        if closure is not None:
            loss = closure()

        if grads is None:
            grads_group = [None]*len(self.param_groups)
        # backward compatibility
        # assuming a list/generator of parameter means single group
        elif isinstance(grads, types.GeneratorType):
            grads_group = [grads]
        elif type(grads[0])!=list:
            grads_group = [grads]
        else:
            grads_group = grads

        if output_params is None:
            output_params_group = [None]*len(self.param_groups)
        elif isinstance(output_params, types.GeneratorType):
            output_params_group = [output_params]
        elif type(output_params[0])!=list:
            output_params_group = [output_params]
        else:
            output_params_group = output_params

        if grad_norms is None:
            grad_norms = [None]*len(self.param_groups)

        for group, grads_this_group, output_params_this_group, grad_norm in zip(self.param_groups, grads_group, output_params_group, grad_norms):
            if grads_this_group is None:
               grads_this_group = [None]*len(group['params'])
            if output_params_this_group is None:
               output_params_this_group = [None]*len(group['params'])

            # compute combined scale factor for this group
            combined_scale = scale
            if group['max_grad_norm'] > 0:
                # norm is in fact norm*scale
                clip = ((grad_norm / scale) + 1e-6) / group['max_grad_norm']
                if clip > 1:
                    combined_scale = clip * scale

            bias_correction = 1 if group['bias_correction'] else 0

            for p, grad, output_param in zip(group['params'], grads_this_group, output_params_this_group):
                #note: p.grad should not ever be set for correct operation of mixed precision optimizer that sometimes sends None gradients
                if p.grad is None and grad is None:
                    continue
                if grad is None:
                    grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError('FusedAdam does not support sparse gradients, please consider SparseAdam instead')

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state['step'] = 0
                    # Exponential moving average of gradient values
                    state['exp_avg'] = torch.zeros_like(p.data)
                    # Exponential moving average of squared gradient values
                    state['exp_avg_sq'] = torch.zeros_like(p.data)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                state['step'] += 1

                out_p = torch.tensor([], dtype = torch.float) if output_param is None else output_param
                fused_adam_cuda.adam(p.data,
                                     out_p,
                                     exp_avg,
                                     exp_avg_sq,
                                     grad,
                                     group['lr'],
                                     beta1,
                                     beta2,
                                     group['eps'],
                                     combined_scale,
                                     state['step'],
                                     self.eps_mode,
                                     bias_correction,
                                     group['weight_decay'])
        return loss


================================================
FILE: apex/apex/parallel/LARC.py
================================================
import torch
from torch import nn
from torch.autograd import Variable
from torch.nn.parameter import Parameter

class LARC(object):
    """
    :class:`LARC` is a pytorch implementation of both the scaling and clipping variants of LARC,
    in which the ratio between gradient and parameter magnitudes is used to calculate an adaptive 
    local learning rate for each individual parameter. The algorithm is designed to improve
    convergence of large batch training.
     
    See https://arxiv.org/abs/1708.03888 for calculation of the local learning rate.

    In practice it modifies the gradients of parameters as a proxy for modifying the learning rate
    of the parameters. This design allows it to be used as a wrapper around any torch.optim Optimizer.

    ```
    model = ...
    optim = torch.optim.Adam(model.parameters(), lr=...)
    optim = LARC(optim)
    ```

    It can even be used in conjunction with apex.fp16_utils.FP16_optimizer.

    ```
    model = ...
    optim = torch.optim.Adam(model.parameters(), lr=...)
    optim = LARC(optim)
    optim = apex.fp16_utils.FP16_Optimizer(optim)
    ```

    Args:
        optimizer: Pytorch optimizer to wrap and modify learning rate for.
        trust_coefficient: Trust coefficient for calculating the lr. See https://arxiv.org/abs/1708.03888
        clip: Decides between clipping or scaling mode of LARC. If `clip=True` the learning rate is set to `min(optimizer_lr, local_lr)` for each parameter. If `clip=False` the learning rate is set to `local_lr*optimizer_lr`.
        eps: epsilon kludge to help with numerical stability while calculating adaptive_lr
    """

    def __init__(self, optimizer, trust_coefficient=0.02, clip=True, eps=1e-8):
        self.param_groups = optimizer.param_groups
        self.optim = optimizer
        self.trust_coefficient = trust_coefficient
        self.eps = eps
        self.clip = clip

    def __getstate__(self):
        return self.optim.__getstate__()

    def __setstate__(self, state):
        self.optim.__setstate__(state)

    def __repr__(self):
        return self.optim.__repr__()

    def state_dict(self):
        return self.optim.state_dict()

    def load_state_dict(self, state_dict):
        self.optim.load_state_dict(state_dict)

    def zero_grad(self):
        self.optim.zero_grad()

    def add_param_group(self, param_group):
        self.optim.add_param_group( param_group)

    def step(self):
        with torch.no_grad():
            weight_decays = []
            for group in self.optim.param_groups:
                # absorb weight decay control from optimizer
                weight_decay = group['weight_decay'] if 'weight_decay' in group else 0
                weight_decays.append(weight_decay)
                group['weight_decay'] = 0
                for p in group['params']:
                    if p.grad is None:
                        continue
                    param_norm = torch.norm(p.data)
                    grad_norm = torch.norm(p.grad.data)

                    if param_norm != 0 and grad_norm != 0:
                        # calculate adaptive lr + weight decay
                        adaptive_lr = self.trust_coefficient * (param_norm) / (grad_norm + param_norm * weight_decay + self.eps)

                        # clip learning rate for LARC
                        if self.clip:
                            # calculation of adaptive_lr so that when multiplied by lr it equals `min(adaptive_lr, lr)`
                            adaptive_lr = min(adaptive_lr/group['lr'], 1)

                        p.grad.data += weight_decay * p.data
                        p.grad.data *= adaptive_lr

        self.optim.step()
        # return weight decay control to optimizer
        for i, group in enumerate(self.optim.param_groups):
            group['weight_decay'] = weight_decays[i]


================================================
FILE: apex/apex/parallel/README.md
================================================
## Distributed Data Parallel

distributed.py contains the source code for `apex.parallel.DistributedDataParallel`, a module wrapper that enables multi-process multi-GPU data parallel training optimized for NVIDIA's NCCL communication library.

`apex.parallel.DistributedDataParallel` achieves high performance by overlapping communication with
computation in the backward pass and bucketing smaller transfers to reduce the total number of
transfers required.

multiproc.py contains the source code for `apex.parallel.multiproc`, a launch utility that places one process on each of the node's available GPUs.

#### [API Documentation](https://nvidia.github.io/apex/parallel.html)

#### [Example/Walkthrough](https://github.com/NVIDIA/apex/tree/master/examples/distributed)

#### [Imagenet example with Mixed Precision](https://github.com/NVIDIA/apex/tree/master/examples/imagenet)

#### [Simple example with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/FP16_Optimizer_simple/distributed_apex)

### Synchronized Batch Normalization

`apex.parallel.SyncBatchNorm` has similar APIs as with `torch.nn.BatchNorm*N*d`.
It reduces stats on the first (channel) dimension of the Tensor and accepts
arbitrary spatial dimensions.

#### Installation

Apex provides two sync BN implementation:

1. There is the Python-only implementation, which is the default implementation
when install with `python setup.py install`.
It uses PyTorch primitive operations and distributed communication package from
`torch.distributed`.

   - _Python-only implementation requires input tensor to be of same data type as
layer_

2. We also provide implementation with kernels through CUDA/C++ extension with
improved performance. We are experimenting with Welford and Kahan for reduction
hoping to get better accuracy.
   To use the kernel implementation, user need to install Apex with CUDA extension
enabled `python setup.py install --cuda_ext`.

   - _Custom kernel implementation supports fp16 input with fp32 layer as cudnn.
This is required to run imagenet example in fp16._

   - _Currently kernel implementation only supports GPU._

#### HowTo

1. User could use `apex.parallel.SyncBatchNorm` by building their module with
the layer explicitly.

```
import apex
input_t = torch.randn(3, 5, 20).cuda()
sbn = apex.parallel.SyncBatchNorm(5).cuda()
output_t = sbn(input)
```

2. User could also take a constructed `torch.nn.Model` and replace all its `torch.nn.BatchNorm*N*d` modules with `apex.parallel.SyncBatchNorm` through utility function `apex.parallel.convert_syncbn_model`.

```
# model is an instance of torch.nn.Module
import apex
sync_bn_model = apex.parallel.convert_syncbn_model(model)
```


================================================
FILE: apex/apex/parallel/__init__.py
================================================
import torch

if hasattr(torch.distributed, 'ReduceOp'):
    ReduceOp = torch.distributed.ReduceOp
elif hasattr(torch.distributed, 'reduce_op'):
    ReduceOp = torch.distributed.reduce_op
else:
    ReduceOp = torch.distributed.deprecated.reduce_op

from .distributed import DistributedDataParallel, Reducer
# This is tricky because I'd like SyncBatchNorm to be exposed the same way
# for both the cuda-enabled and python-fallback versions, and I don't want
# to suppress the error information.
try:
    import syncbn
    from .optimized_sync_batchnorm import SyncBatchNorm
except ImportError as err:
    from .sync_batchnorm import SyncBatchNorm
    SyncBatchNorm.syncbn_import_error = err

def convert_syncbn_model(module, process_group=None, channel_last=False):
    '''
    Recursively traverse module and its children to replace all instances of
    ``torch.nn.modules.batchnorm._BatchNorm`` with :class:`apex.parallel.SyncBatchNorm`.

    All ``torch.nn.BatchNorm*N*d`` wrap around
    ``torch.nn.modules.batchnorm._BatchNorm``, so this function lets you easily switch
    to use sync BN.

    Args:
        module (torch.nn.Module): input module

    Example::

        >>> # model is an instance of torch.nn.Module
        >>> import apex
        >>> sync_bn_model = apex.parallel.convert_syncbn_model(model)
    '''
    mod = module
    if isinstance(module, torch.nn.modules.batchnorm._BatchNorm):
        mod = SyncBatchNorm(module.num_features, module.eps, module.momentum, module.affine, module.track_running_stats, process_group, channel_last=channel_last)
        mod.running_mean = module.running_mean
        mod.running_var = module.running_var
        if module.affine:
            mod.weight.data = module.weight.data.clone().detach()
            mod.bias.data = module.bias.data.clone().detach()
    for name, child in module.named_children():
        mod.add_module(name, convert_syncbn_model(child,
                                                  process_group=process_group,
                                                  channel_last=channel_last))
    # TODO(jie) should I delete model explicitly?
    del module
    return mod

def create_syncbn_process_group(group_size):
    '''
    Creates process groups to be used for syncbn of a give ``group_size`` and returns
    process group that current GPU participates in.

    ``group_size`` must divide the total number of GPUs (world_size).

    ``group_size`` of 0 would be considered as =world_size. In this case ``None`` will be returned.

    ``group_size`` of 1 would be equivalent to using non-sync bn, but will still carry the overhead.

    Args:
        group_size (int): number of GPU's to collaborate for sync bn

    Example::

        >>> # model is an instance of torch.nn.Module
        >>> import apex
        >>> group = apex.parallel.create_syncbn_process_group(group_size)
    '''

    if group_size==0:
        return None

    world_size = torch.distributed.get_world_size()
    assert(world_size >= group_size)
    assert(world_size % group_size == 0)

    group=None
    for group_num in (range(world_size//group_size)):
        group_ids = range(group_num*group_size, (group_num+1)*group_size)
        cur_group = torch.distributed.new_group(ranks=group_ids)
        if (torch.distributed.get_rank()//group_size == group_num):
            group = cur_group
            #can not drop out and return here, every process must go through creation of all subgroups

    assert(group is not None)
    return group


================================================
FILE: apex/apex/parallel/distributed.py
================================================
import torch
import torch.distributed as dist
from torch.nn.modules import Module
from torch.autograd import Variable
from collections import OrderedDict
from itertools import chain
import copy
import importlib
from ..multi_tensor_apply import multi_tensor_applier

imported_flatten_impl = False

def import_flatten_impl():
    global flatten_impl, unflatten_impl, imported_flatten_impl
    try:
        import apex_C
        flatten_impl = apex_C.flatten
        unflatten_impl = apex_C.unflatten
    except ImportError:
        print("Warning:  apex was installed without --cpp_ext.  Falling back to Python flatten and unflatten.")
        flatten_impl = torch._utils._flatten_dense_tensors
        unflatten_impl = torch._utils._unflatten_dense_tensors
    imported_flatten_impl = True

def flatten(bucket):
    if not imported_flatten_impl:
        import_flatten_impl()
    return flatten_impl(bucket)

def unflatten(coalesced, bucket):
    if not imported_flatten_impl:
        import_flatten_impl()
    return unflatten_impl(coalesced, bucket)

# apply_dist_call requires that tensors in 'bucket' are all the same type.
def apply_flat_dist_call(bucket, call, extra_args=None):

    coalesced = flatten(bucket)

    if extra_args is not None:
        call(coalesced, *extra_args)
    else:
        call(coalesced)

    if call is dist.all_reduce:
        coalesced /= dist.get_world_size()
        
    for buf, synced in zip(bucket, unflatten(coalesced, bucket)):
        buf.copy_(synced)

def split_half_float_double(tensors):
    dtypes = ["torch.cuda.HalfTensor",  "torch.cuda.FloatTensor", "torch.cuda.DoubleTensor"]
    buckets = []
    for i, dtype in enumerate(dtypes):
        bucket = [t for t in tensors if t.type() == dtype]
        if bucket:
            buckets.append(bucket) 
    return buckets

def split_by_type(tensors):
    buckets = OrderedDict()
    for tensor in tensors:
        tp = tensor.type()
        if tp not in buckets:
            buckets[tp] = []
        buckets[tp].append(tensor)
    return buckets

# flat_dist_call organizes 'tensors' by type.
def flat_dist_call(tensors, call, extra_args=None):
    buckets = split_by_type(tensors)
                    
    for tp in buckets:
        bucket = buckets[tp]
        apply_flat_dist_call(bucket, call, extra_args)

            
def extract_tensors(maybe_tensor, tensor_list):
    if torch.is_tensor(maybe_tensor):
        tensor_list.append(maybe_tensor)
    else:
        try:
            for item in maybe_tensor:
                extract_tensors(item, tensor_list)
        except TypeError:
            return

        
class Reducer(object):
    """
    :class:`apex.parallel.Reducer` is a simple class that helps allreduce a module's parameters
    across processes.  :class:`Reducer` is intended to give the user additional control:
    Unlike :class:`DistributedDataParallel`, :class:`Reducer` will not automatically allreduce
    parameters during ``backward()``.
    Instead, :class:`Reducer` waits for the user to call ``<reducer_instance>.reduce()`` manually.
    This enables, for example, delaying the allreduce to be carried out every 
    several iterations instead of every single iteration.

    Like :class:`DistributedDataParallel`, :class:`Reducer` averages any tensors it allreduces 
    over the number of participating processes.

    :class:`Reducer` is designed to work with the upstream launch utility script 
    ``torch.distributed.launch`` with ``--nproc_per_node <= number of gpus per node``.
    When used with this launcher, :class:`Reducer` assumes 1:1 mapping of processes to GPUs.
    It also assumes that your script calls ``torch.cuda.set_device(args.rank)`` before creating the model.

    Args:
        module_or_grads_list: Either a network definition (module) being run in multi-gpu/distributed mode, or an iterable of gradients to be reduced.  If a module is passed in, the Reducer constructor will sync the parameters across processes (broadcasting from rank 0) to make sure they're all initialized with the same values.  If a list of gradients (that came from some module) is passed in, the user is responsible for manually syncing that module's parameters at the beginning of training.
    """
    
    def __init__(self, module_or_grads_list):
        if isinstance(module_or_grads_list, Module):
            self.module = module_or_grads_list
            flat_dist_call([param.data for param in self.module.parameters()], dist.broadcast, (0,) )

        else:
            self.module = None
            self.grads = []
            extract_tensors(module_or_grads_list, self.grads)
            
    def reduce(self):
        if self.module:
            grads = [param.grad.data for param in self.module.parameters() if param.grad is not None]
            flat_dist_call(grads, dist.all_reduce)
        else:
            flat_dist_call(self.grads, dist.all_reduce)
            
            
class DistributedDataParallel(Module):
    """
    :class:`apex.parallel.DistributedDataParallel` is a module wrapper that enables
    easy multiprocess distributed data parallel training, similar to ``torch.nn.parallel.DistributedDataParallel``.  Parameters are broadcast across participating processes on initialization, and gradients are
    allreduced and averaged over processes during ``backward()``.

    :class:`DistributedDataParallel` is optimized for use with NCCL.  It achieves high performance by 
    overlapping communication with computation during ``backward()`` and bucketing smaller gradient
    transfers to reduce the total number of transfers required.

    :class:`DistributedDataParallel` is designed to work with the upstream launch utility script 
    ``torch.distributed.launch`` with ``--nproc_per_node <= number of gpus per node``.
    When used with this launcher, :class:`DistributedDataParallel` assumes 1:1 mapping of processes to GPUs.
    It also assumes that your script calls ``torch.cuda.set_device(args.rank)`` before creating the model.

    https://github.com/NVIDIA/apex/tree/master/examples/simple/distributed shows detailed usage.
    https://github.com/NVIDIA/apex/tree/master/examples/imagenet shows another example
    that combines :class:`DistributedDataParallel` with mixed precision training.

    Args:
        module: Network definition to be run in multi-gpu/distributed mode.
        message_size (int, default=1e7): Minimum number of elements in a communication bucket.
        delay_allreduce (bool, default=False):  Delay all communication to the end of the backward pass.  This disables overlapping communication with computation.
        allreduce_trigger_params (list, optional, default=None):  If supplied, should contain a list of parameters drawn from the model.  Allreduces will be kicked off whenever one of these parameters receives its gradient (as opposed to when a bucket of size message_size is full).  At the end of backward(), a cleanup allreduce to catch any remaining gradients will also be performed automatically.  If allreduce_trigger_params is supplied, the message_size argument will be ignored.
        allreduce_always_fp32 (bool, default=False):  Convert any FP16 gradients to FP32 before allreducing.  This can improve stability for widely scaled-out runs.
        gradient_average (bool, default=True):  Option to toggle whether or not DDP averages the allreduced gradients over processes.  For proper scaling, the default value of True is recommended.
        gradient_predivide_factor (float, default=1.0):  Allows perfoming the average of gradients over processes partially before and partially after the allreduce.  Before allreduce:  ``grads.mul_(1.0/gradient_predivide_factor)``.  After allreduce:  ``grads.mul_(gradient_predivide_factor/world size)``.  This can reduce the stress on the dynamic range of FP16 allreduces for widely scaled-out runs.

    .. warning::
        If ``gradient_average=False``, the pre-allreduce division (``grads.mul_(1.0/gradient_predivide_factor)``) will still be applied, but the post-allreduce gradient averaging (``grads.mul_(gradient_predivide_factor/world size)``) will be omitted.

    """

    def __init__(self, 
                 module, 
                 message_size=10000000, 
                 delay_allreduce=False, 
                 shared_param=None,
                 allreduce_trigger_params=None,
                 retain_allreduce_buffers=False,
                 allreduce_always_fp32=False,
                 gradient_average=True,
                 gradient_predivide_factor=1.0):
        super(DistributedDataParallel, self).__init__()

        # Backward/forward compatibility around 
        # https://github.com/pytorch/pytorch/commit/540ef9b1fc5506369a48491af8a285a686689b36 and
        # https://github.com/pytorch/pytorch/commit/044d00516ccd6572c0d6ab6d54587155b02a3b86
        if hasattr(dist, "get_backend"):
            self._backend = dist.get_backend()
            if hasattr(dist, "DistBackend"):
                self.backend_enum_holder = dist.DistBackend
            else:
                self.backend_enum_holder = dist.Backend
        else:
            self._backend = dist._backend 
            self.backend_enum_holder = dist.dist_backend

        self.warn_on_half = True if self._backend == self.backend_enum_holder.GLOO else False

        if shared_param is not None:
            raise ValueError("shared_param is no longer supported as an option.  It was misleadingly named from the start.  It turns out overlapping communication with computation should work fine with shared parameters.  If you still wish to delay communication to the end of the backward pass, use delay_allreduce=True|False instead.") 

        self.world_size = float(dist.get_world_size())

        self.retain_allreduce_buffers = retain_allreduce_buffers
        self.allreduce_always_fp32 = allreduce_always_fp32
        self.gradient_average = gradient_average
        self.gradient_predivide_factor = gradient_predivide_factor

        self.custom_allreduce_triggers = False
        if allreduce_trigger_params is not None:
            if delay_allreduce:
                raise ValueError("Setting allreduce_trigger_params is only valid if delay_allreduce=False.")  
            self.custom_allreduce_triggers = True
            self.allreduce_trigger_params = set([id(param) for param in allreduce_trigger_params])

        self.delay_allreduce = delay_allreduce
        self.message_size = message_size

        self.reduction_stream = torch.cuda.Stream()
        self.reduction_event = torch.cuda.Event(enable_timing=False, blocking=False) 
        
        self.module = module

        self._disable_allreduce = False
        
        if self._backend == self.backend_enum_holder.NCCL:
            for param in self.module.parameters():
                assert param.is_cuda, "NCCL backend only supports model parameters to be on GPU."

        self.active_params = []

        self.param_type_to_tmp_i = {"torch.cuda.HalfTensor" : 0, 
                                    "torch.cuda.FloatTensor" : 1,
                                    "torch.cuda.DoubleTensor" : 2}

        if multi_tensor_applier.available:
            # TODO:  I really need to centralize the C++ backed imports
            import amp_C
            self.multi_tensor_scale = amp_C.multi_tensor_scale
            self._overflow_buf = torch.cuda.IntTensor([0])

        self.create_hooks()

        flat_dist_call([param.data for param in self.module.parameters()], dist.broadcast, (0,) )


    def __setstate__(self, state):
        super(DistributedDataParallel, self).__setstate__(state)
        self.reduction_stream = torch.cuda.Stream()
        self.reduction_event = torch.cuda.Event(enable_timing=False, blocking=False) 


    def __getstate__(self):
        attrs = copy.copy(self.__dict__)
        if self._backend != self.backend_enum_holder.NCCL:
            del attrs['self.reduction_stream']
            del attrs['self.reduction_event']
            return attrs

    def enable_allreduce(self):
        self._disable_allreduce = False

    def disable_allreduce(self):
        self._disable_allreduce = True
      
    # Broadcast rank 0's bucket structure across all processes, and have all processes 
    # regenerate their bucket structures to match. 
    def sync_bucket_structure(self):
        # Append leftover buckets
        for tmp_bucket in self.tmp_buckets:
            if len(tmp_bucket) > 0:
                self.active_i_buckets.append(tmp_bucket)

        self.num_buckets = len(self.active_i_buckets)
        self.bucket_sizes = [len(bucket) for bucket in self.active_i_buckets]

        info_tensor = torch.cuda.IntTensor([self.num_buckets] + 
                                           self.bucket_sizes + 
                                           list(chain(*self.active_i_buckets)))

        dist.broadcast(info_tensor, 0)

        info = [int(entry) for entry in info_tensor]

        self.num_buckets = info[0]
        self.bucket_sizes = info[1:self.num_buckets + 1] 
        self.buckets = [[None for _ in range(self.bucket_sizes[i])] 
                        for i in range(self.num_buckets)] 
        # Technically, active_i_buckets' work is done.  But the information is still useful to
        # keep around.  Therefore, refresh active_i_buckets based on rank 0 as well.
        self.active_i_buckets = [[None for _ in range(self.bucket_sizes[i])] 
                                 for i in range(self.num_buckets)] 
        
        flattened_buckets = info[self.num_buckets + 1:]
        flat_i = 0
        for bucket_idx in range(self.num_buckets): 
            for bucket_loc in range(self.bucket_sizes[bucket_idx]):
                param_i = flattened_buckets[flat_i]
                self.active_i_buckets[bucket_idx][bucket_loc] = param_i 
                self.param_id_to_bucket[id(self.active_params[param_i])] = (bucket_idx, bucket_loc)
                flat_i += 1 
        
        
    def create_hooks(self):
        # Fallback hook that's only called at the end of backward.
        # Used if you deliberately want to delay allreduces to the end, or to refresh the 
        # bucket structure that will be used to overlap communication with computation in later
        # iterations.
        def allreduce_params():
            # Bucket record refresh
            if not self.delay_allreduce:
                if self.needs_refresh:
                    self.sync_bucket_structure()

                    self.needs_refresh = False

            self.allreduce_fallback()


        def overlapping_backward_epilogue():
            self.reduction_stream.record_event(self.reduction_event)
            torch.cuda.current_stream().wait_event(self.reduction_event)
     
            # Sanity checks that all the buckets were kicked off
            if self.next_bucket != self.num_buckets:
                raise RuntimeError("In epilogue, next_bucket ({}) != num_buckets ({}).  ".format(
                                   self.next_bucket, self.num_buckets),
                                   "This probably indicates some buckets were not allreduced.")

            for actual, expected in zip(self.buckets_ready_size, self.bucket_sizes):
                if actual != expected:
                    raise RuntimeError("Some param buckets were not allreduced.")
           

        self.grad_accs = []
        for param in self.module.parameters():
            if param.requires_grad:
                def wrapper(param):
                    param_tmp = param.expand_as(param)
                    grad_acc = param_tmp.grad_fn.next_functions[0][0]

                    def allreduce_hook(*unused):
                        if not self._disable_allreduce:
                            if self.delay_allreduce or self.needs_refresh:
                                # TODO:  How do we want to handle multiple backward passes between
                                # each forward, e.g., backward passes with retain_graph=True?
                                # needs_refresh and callback_queued are both vulnerable states.
                                if not self.delay_allreduce and self.needs_refresh:
                                    # Use the backward pass to build the bucket structure on the fly.
                                    active_i = self.param_id_to_active_i[id(param)]

                                    # Float, half, and double tensors are grouped into buckets separately.
                                    current_type = self.param_type_to_tmp_i[param.type()]
  
                                    self.tmp_buckets[current_type].append(active_i)                          

                                    ship_tmp_bucket = False
                                    if self.custom_allreduce_triggers:
                                        if id(param) in self.allreduce_trigger_params:
                                            ship_tmp_bucket = True
                                    else:
                                        self.tmp_numels[current_type] += param.numel()
                                        if self.tmp_numels[current_type] >= self.message_size:
                                            ship_tmp_bucket = True

                                    # To consider:  If custom_allreduce_triggers are in use, ship all
                                    # tmp_buckets, not just tmp_buckets[current_type].
                                    if ship_tmp_bucket:
                                        self.active_i_buckets.append(self.tmp_buckets[current_type])
                                        self.tmp_buckets[current_type] = []
                                        self.tmp_numels[current_type] = 0
                                
                                if not self.callback_queued:
                                    Variable._execution_engine.queue_callback(allreduce_params)
                                    self.callback_queued = True
                            else:
                                if not self.callback_queued:
                                    Variable._execution_engine.queue_callback(overlapping_backward_epilogue)
                                    self.callback_queued = True 

                                self.comm_ready_buckets(param)
                        
                    grad_acc.register_hook(allreduce_hook)
                    self.grad_accs.append(grad_acc)

                wrapper(param)

    def allreduce_bucket(self, bucket):
        tensor = flatten(bucket)

        tensor_to_allreduce = tensor 

        if self.allreduce_always_fp32:
            tensor_to_allreduce = tensor.float() 

        if self.gradient_predivide_factor != 1.0:
            tensor_to_allreduce.mul_(1./self.gradient_predivide_factor)

        dist.all_reduce(tensor_to_allreduce)

        if self.gradient_average:
            if self.gradient_predivide_factor != self.world_size:
                tensor_to_allreduce.mul_(self.gradient_predivide_factor/self.world_size)

        if self.allreduce_always_fp32 and tensor is not tensor_to_allreduce:
            tensor.copy_(tensor_to_allreduce)
 
        return tensor
    

    def allreduce_maybe_retain(self, bucket, bucket_idx=-1):
        allreduced = self.allreduce_bucket(bucket)
        if self.retain_allreduce_buffers:
            if self.allreduce_buffers[bucket_idx] is not None:
                raise RuntimeError("The backward pass is attempting to replace an already-filled "
                                   "allreduce buffer.  This is almost certainly an error.")
            self.allreduce_buffers[bucket_idx] = allreduced
        else:
            if multi_tensor_applier.available:
                multi_tensor_applier(
                    self.multi_tensor_scale,
                    self._overflow_buf,
                    [unflatten(allreduced, bucket), bucket],
                    1.0)
            else:
                for buf, synced in zip(bucket, unflatten(allreduced, bucket)):
                    buf.copy_(synced)


    def allreduce_fallback(self):
        grads = [param.grad.data for param in self.module.parameters() if param.grad is not None]

        split_buckets = split_half_float_double(grads)

        # If retain_allreduce_buffers is True and delay_allreduce is False,
        # this will only be done during the first backward pass, ignored by the 
        # training script, and overwritten in the next forward pass.  So it's harmless. 
        if self.retain_allreduce_buffers:
            self.allreduce_buffers = [None for _ in range(len(split_buckets))]
    
        for i, bucket in enumerate(split_buckets):
            allreduced = self.allreduce_maybe_retain(bucket, i)


    def comm_ready_buckets(self, param):
        # Need to do this in every hook for compatibility with Ruberry's streaming backward PR.
        # self.reduction_stream.wait_stream(torch.cuda.current_stream())

        bucket_idx, bucket_loc = self.param_id_to_bucket[id(param)]

        if self.buckets[bucket_idx][bucket_loc] is not None:
            raise RuntimeError("The backward pass is attempting to replace an already-filled "
                               "bucket slot.  This is almost certainly an error.")

        self.buckets[bucket_idx][bucket_loc] = param.grad.data
        self.buckets_ready_size[bucket_idx] += 1

        if self.buckets_ready_size[bucket_idx] == self.bucket_sizes[bucket_idx]:
            if bucket_idx == self.next_bucket:
                torch.cuda.current_stream().record_event(self.reduction_event)
                self.reduction_stream.wait_event(self.reduction_event)
                with torch.cuda.stream(self.reduction_stream):
                    self.allreduce_maybe_retain(self.buckets[bucket_idx], bucket_idx)

                    self.next_bucket += 1

                    # Reversing upstream's logic here, because we constructed our buckets based on
                    # the order things were received during backward.
                    if len(self.ready_buckets_not_reduced) > 0:
                        sorted_todo = sorted(self.ready_buckets_not_reduced)
                        for i in sorted_todo:
                            # Nothing can be reduced now
                            if i > self.next_bucket:
                                break
                            elif i == self.next_bucket:
                                self.allreduce_maybe_retain(self.buckets[i], i)
                                self.ready_buckets_not_reduced.remove(i)
                                self.next_bucket += 1 
                            else:
                                raise ValueError("i should always be >= next_bucket")
            else:
                self.ready_buckets_not_reduced.add(bucket_idx)

        
    def forward(self, *inputs, **kwargs):
        result = self.module(*inputs, **kwargs)
       
        if not self._disable_allreduce:
            if not self.delay_allreduce:
                param_list = [param for param in self.module.parameters() if param.requires_grad]

                # Conditions under which to refresh self.record
                # Forward has the authority to set needs_refresh to True, but only allreduce_params
                # in backward has the authority to set needs_refresh to False.
                # Parentheses are not necessary for correct order of operations, but make the intent clearer.
                if ((not self.active_params) or 
                    (len(param_list) != len(self.active_params)) or
                    any([param1 is not param2 for param1, param2 in zip(param_list, self.active_params)])):
                    self.needs_refresh = True

                if self.needs_refresh:
                    self.active_i_buckets = []
                    self.buckets = []
                    self.tmp_buckets = [[], [], []] # [running half, float, double buckets]
                    self.tmp_numels = [0, 0, 0]
                    self.bucket_sizes = []
                    self.param_id_to_active_i = {id(param) : i for i, param in enumerate(param_list)}  
                    self.param_id_to_bucket = {}
                else:
                    self.buckets = [[None for _ in range(self.bucket_sizes[i])] 
                                   for i in range(self.num_buckets)] 
                    self.buckets_ready_size = [0 for i in range(self.num_buckets)]
                    if(self.retain_allreduce_buffers):
                        self.allreduce_buffers = [None for _ in range(self.num_buckets)]
                    self.next_bucket = 0
                    self.ready_buckets_not_reduced = set()
            
                self.active_params = param_list

            self.callback_queued = False
        
        return result


================================================
FILE: apex/apex/parallel/multiproc.py
================================================
import torch
import sys
import subprocess

def docstring_hack():
    """
    Multiproc file which will launch a set of processes locally for multi-gpu
    usage: python -m apex.parallel.multiproc main.py ...
    """
    pass

argslist = list(sys.argv)[1:]
world_size = torch.cuda.device_count()

if '--world-size' in argslist:
    world_size = int(argslist[argslist.index('--world-size')+1])
else:
    argslist.append('--world-size')
    argslist.append(str(world_size))

workers = []

for i in range(world_size):
    if '--rank' in argslist:
        argslist[argslist.index('--rank')+1] = str(i)
    else:
        argslist.append('--rank')
        argslist.append(str(i))
    stdout = None if i == 0 else open("GPU_"+str(i)+".log", "w")
    print(argslist)
    p = subprocess.Popen([str(sys.executable)]+argslist, stdout=stdout)
    workers.append(p)

for p in workers:
    p.wait()


================================================
FILE: apex/apex/parallel/optimized_sync_batchnorm.py
================================================
import torch
from torch.nn.modules.batchnorm import _BatchNorm
from torch.nn import functional as F

import syncbn
from .optimized_sync_batchnorm_kernel import SyncBatchnormFunction


class SyncBatchNorm(_BatchNorm):
    """
    synchronized batch normalization module extented from `torch.nn.BatchNormNd`
    with the added stats reduction across multiple processes.
    :class:`apex.parallel.SyncBatchNorm` is designed to work with
    `DistributedDataParallel`.

    When running in training mode, the layer reduces stats across all processes
    to increase the effective batchsize for normalization layer. This is useful
    in applications where batch size is small on a given process that would
    diminish converged accuracy of the model. The model uses collective
    communication package from `torch.distributed`.

    When running in evaluation mode, the layer falls back to
    `torch.nn.functional.batch_norm`

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, L)` or :math:`L` from input of size :math:`(N, L)`
        eps: a value added to the denominator for numerical stability.
            Default: 1e-5
        momentum: the value used for the running_mean and running_var
            computation. Can be set to ``None`` for cumulative moving average
            (i.e. simple average). Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters. Default: ``True``
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``True``
        process_group: pass in a process group within which the stats of the
            mini-batch is being synchronized. ``None`` for using default process
            group
        channel_last: a boolean value that when set to ``True``, this module
            take the last dimension of the input tensor to be the channel
            dimension. Default: False

    Examples::
        >>> # channel first tensor
        >>> sbn = apex.parallel.SyncBatchNorm(100).cuda()
        >>> inp = torch.randn(10, 100, 14, 14).cuda()
        >>> out = sbn(inp)
        >>> inp = torch.randn(3, 100, 20).cuda()
        >>> out = sbn(inp)
        >>> # channel last tensor
        >>> sbn = apex.parallel.SyncBatchNorm(100, channel_last=True).cuda()
        >>> inp = torch.randn(10, 14, 14, 100).cuda()
    """

    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True, process_group=None, channel_last=False):
        super(SyncBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats)
        self.process_group = process_group
        self.channel_last = channel_last

    def _specify_process_group(self, process_group):
        self.process_group = process_group

    def _specify_channel_last(self, channel_last):
        self.channel_last = channel_last

    def forward(self, input):
        # if input.dim() == 2, we switch to channel_last for efficient memory accessing
        channel_last = self.channel_last if input.dim() != 2 else True

        if not self.training and self.track_running_stats and not channel_last:
            # fall back to pytorch implementation for inference
            return F.batch_norm(input, self.running_mean, self.running_var, self.weight, self.bias, False, 0.0, self.eps)
        else:
            exponential_average_factor = 0.0
            if self.training and self.track_running_stats:
                self.num_batches_tracked += 1
                if self.momentum is None:
                    exponential_average_factor = 1.0 / float(self.num_batches_tracked)
                else:
                    exponential_average_factor = self.momentum
            return SyncBatchnormFunction.apply(input, self.weight, self.bias, self.running_mean, self.running_var, self.eps, self.training or not self.track_running_stats, exponential_average_factor, self.process_group, channel_last)


================================================
FILE: apex/apex/parallel/optimized_sync_batchnorm_kernel.py
================================================
import torch
from torch.autograd.function import Function

import syncbn
from apex.parallel import ReduceOp

class SyncBatchnormFunction(Function):

    @staticmethod
    def forward(ctx, input, weight, bias, running_mean, running_variance, eps, track_running_stats = True, momentum = 1.0, process_group = None, channel_last = False):
        torch.cuda.nvtx.range_push("sync_BN_fw")
        input = input.contiguous()
        world_size = 0

        mean = None
        var_biased = None
        inv_std = None
        var = None
        out = None
        count = None
        if track_running_stats:
            if channel_last:
                count = int(input.numel()/input.size(-1))
                mean, var_biased = syncbn.welford_mean_var_c_last(input)
            else:
                count = int(input.numel()/input.size(1))
                mean, var_biased = syncbn.welford_mean_var(input)

            if torch.distributed.is_initialized():
                if not process_group:
                    process_group = torch.distributed.group.WORLD
                world_size = torch.distributed.get_world_size(process_group)
                mean_all = torch.empty(world_size, mean.size(0), dtype=mean.dtype, device=mean.device)
                var_all = torch.empty(world_size, var_biased.size(0), dtype=var_biased.dtype, device=var_biased.device)
                mean_l = [mean_all.narrow(0, i, 1) for i in range(world_size)]
                var_l = [var_all.narrow(0, i, 1) for i in range(world_size)]
                torch.distributed.all_gather(mean_l, mean, process_group)
                torch.distributed.all_gather(var_l, var_biased, process_group)
                mean, var, inv_std = syncbn.welford_parallel(mean_all, var_all, count, eps)
                # TODO(Jie): should do fp32 math instead!
            else:
                inv_std = 1.0 / torch.sqrt(var_biased + eps)
                var = var_biased * (count) / (count-1) 

            if count == 1 and world_size < 2:
                raise ValueError('Expected more than 1 value per channel when training, got input size{}'.format(input.size()))

            r_m_inc = mean if running_mean.dtype != torch.float16 else mean.half()
            r_v_inc = var if running_variance.dtype != torch.float16 else var.half()
            running_mean.data = running_mean.data * (1-momentum) + momentum*r_m_inc
            running_variance.data = running_variance.data * (1-momentum) + momentum*r_v_inc
        else:
            mean = running_mean.data
            inv_std = 1.0 / torch.sqrt(running_variance.data + eps)

        ctx.save_for_backward(input, weight, mean, inv_std)
        ctx.process_group = process_group
        ctx.channel_last = channel_last
        ctx.world_size = world_size

        if channel_last:
            out = syncbn.batchnorm_forward_c_last(input, mean, inv_std, weight, bias)
        else:
            out = syncbn.batchnorm_forward(input, mean, inv_std, weight, bias)

        torch.cuda.nvtx.range_pop()
        return out

    @staticmethod
    def backward(ctx, grad_output):
        grad_output = grad_output.contiguous()
        torch.cuda.nvtx.range_push("sync_BN_bw")
        # mini batch mean & var are calculated by forward path.
        # mu = 1./N*np.sum(h, axis = 0)
        # var = 1./N*np.sum((h-mu)**2, axis = 0)
        saved_input, weight, mean, inv_std = ctx.saved_tensors
        process_group = ctx.process_group
        channel_last = ctx.channel_last
        world_size = ctx.world_size
        grad_input = grad_weight = grad_bias = None

        # TODO(jie): why do I have to clone here? life time of grad_output?
        if channel_last:
            mean_dy, mean_dy_xmu, grad_weight, grad_bias = syncbn.reduce_bn_c_last(grad_output, saved_input, mean, inv_std, weight)
        else:
            mean_dy, mean_dy_xmu, grad_weight, grad_bias = syncbn.reduce_bn(grad_output, saved_input, mean, inv_std, weight)

        # calculate grad_input
        if ctx.needs_input_grad[0]:

            if torch.distributed.is_initialized():
                torch.distributed.all_reduce(
                    mean_dy, ReduceOp.SUM, process_group)
                mean_dy = mean_dy / world_size
                torch.distributed.all_reduce(
                    mean_dy_xmu, ReduceOp.SUM, process_group)
                mean_dy_xmu = mean_dy_xmu / world_size
            if channel_last:
                grad_input = syncbn.batchnorm_backward_c_last(grad_output, saved_input, mean, inv_std, weight, mean_dy, mean_dy_xmu)
            else:
                grad_input = syncbn.batchnorm_backward(grad_output, saved_input, mean, inv_std, weight, mean_dy, mean_dy_xmu)

        if weight is None or not ctx.needs_input_grad[1]:
            grad_weight = None

        if weight is None or not ctx.needs_input_grad[2]:
            grad_bias = None

        torch.cuda.nvtx.range_pop()
        return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None


================================================
FILE: apex/apex/parallel/sync_batchnorm.py
================================================
import torch
from torch.nn.modules.batchnorm import _BatchNorm
from torch.nn import functional as F

from .sync_batchnorm_kernel import SyncBatchnormFunction
from apex.parallel import ReduceOp


class SyncBatchNorm(_BatchNorm):
    """
    synchronized batch normalization module extented from ``torch.nn.BatchNormNd``
    with the added stats reduction across multiple processes.
    :class:`apex.parallel.SyncBatchNorm` is designed to work with
    ``DistributedDataParallel``.

    When running in training mode, the layer reduces stats across all processes
    to increase the effective batchsize for normalization layer. This is useful
    in applications where batch size is small on a given process that would
    diminish converged accuracy of the model. The model uses collective
    communication package from ``torch.distributed``.

    When running in evaluation mode, the layer falls back to
    ``torch.nn.functional.batch_norm``.

    Args:
        num_features: :math:`C` from an expected input of size
            :math:`(N, C, L)` or :math:`L` from input of size :math:`(N, L)`
        eps: a value added to the denominator for numerical stability.
            Default: 1e-5
        momentum: the value used for the running_mean and running_var
            computation. Can be set to ``None`` for cumulative moving average
            (i.e. simple average). Default: 0.1
        affine: a boolean value that when set to ``True``, this module has
            learnable affine parameters. Default: ``True``
        track_running_stats: a boolean value that when set to ``True``, this
            module tracks the running mean and variance, and when set to ``False``,
            this module does not track such statistics and always uses batch
            statistics in both training and eval modes. Default: ``True``

    Example::

        >>> sbn = apex.parallel.SyncBatchNorm(100).cuda()
        >>> inp = torch.randn(10, 100, 14, 14).cuda()
        >>> out = sbn(inp)
        >>> inp = torch.randn(3, 100, 20).cuda()
        >>> out = sbn(inp)
    """

    warned = False

    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True, process_group=None, channel_last=False):
        if channel_last == True:
            raise AttributeError("channel_last is not supported by primitive SyncBatchNorm implementation. Try install apex with `--cuda_ext` if channel_last is desired.")

        if not SyncBatchNorm.warned:
            print("Warning:  using Python fallback for SyncBatchNorm, possibly because apex was installed without --cuda_ext.  The exception raised when attempting to import the cuda backend was: ", self.syncbn_import_error)
            SyncBatchNorm.warned = True

        super(SyncBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats)
        self.process_group = process_group

    def _specify_process_group(self, process_group):
        self.process_group = process_group

    def forward(self, input):
        torch.cuda.nvtx.range_push("sync_bn_fw_with_mean_var")
        mean = None
        var = None
        cast = None
        out = None

        # casting to handle mismatch input type to layer type
        if self.running_mean is not None:
            if self.running_mean.dtype != input.dtype:
                input = input.to(self.running_mean.dtype)
                cast = input.dtype
        elif self.weight is not None:
            if self.weight.dtype != input.dtype:
                input = input.to(self.weight.dtype)
                cast = input.dtype

        if not self.training and self.track_running_stats:
            # fall back to pytorch implementation for inference
            torch.cuda.nvtx.range_pop()
            out = F.batch_norm(input, self.running_mean, self.running_var, self.weight, self.bias, False, 0.0, self.eps)
        else:
            process_group = self.process_group
            world_size = 1
            if not self.process_group:
                process_group = torch.distributed.group.WORLD
            self.num_batches_tracked += 1
            with torch.no_grad():
                channel_first_input = input.transpose(0, 1).contiguous()
                squashed_input_tensor_view = channel_first_input.view(
                    channel_first_input.size(0), -1)
                # total number of data points for each variance entry. Used to calculate unbiased variance estimate
                m = None
                local_m = float(squashed_input_tensor_view.size()[1])
                local_mean = torch.mean(squashed_input_tensor_view, 1)
                local_sqr_mean = torch.pow(
                    squashed_input_tensor_view, 2).mean(1)
                if torch.distributed.is_initialized():
                    world_size = torch.distributed.get_world_size(process_group)
                    torch.distributed.all_reduce(
                        local_mean, ReduceOp.SUM, process_group)
                    mean = local_mean / world_size
                    torch.distributed.all_reduce(
                        local_sqr_mean, ReduceOp.SUM, process_group)
                    sqr_mean = local_sqr_mean / world_size
                    m = local_m * world_size
                else:
                    m = local_m
                    mean = local_mean
                    sqr_mean = local_sqr_mean
                # var(x) = E (( x - mean_x ) ** 2)
                #        = 1 / N * sum ( x - mean_x ) ** 2
                #        = 1 / N * sum (x**2) - mean_x**2
                var = sqr_mean - mean.pow(2)

                if self.running_mean is not None:
                    self.running_mean = self.momentum * mean + \
                        (1 - self.momentum) * self.running_mean
                if self.running_var is not None:
                    # as noted by the paper, we used unbiased variance estimate of the mini-batch
                    # Var[x] = m / (m-1) * Eb (sample_variance)
                    self.running_var = m / \
                        (m-1) * self.momentum * var + \
                        (1 - self.momentum) * self.running_var
            torch.cuda.nvtx.range_pop()
            out = SyncBatchnormFunction.apply(input, self.weight, self.bias, mean, var, self.eps, process_group, world_size)
        out = out.to(cast)


================================================
FILE: apex/apex/parallel/sync_batchnorm_kernel.py
================================================
import torch
from torch.autograd.function import Function

from apex.parallel import ReduceOp


class SyncBatchnormFunction(Function):

    @staticmethod
    def forward(ctx, input, weight, bias, running_mean, running_variance, eps, process_group, world_size):
        torch.cuda.nvtx.range_push("sync_BN_fw")
        # transpose it to channel last to support broadcasting for input with different rank
        c_last_input = input.transpose(1, -1).contiguous().clone()

        ctx.save_for_backward(c_last_input, weight, bias,
                              running_mean, running_variance)
        ctx.eps = eps
        ctx.process_group = process_group
        ctx.world_size = world_size

        c_last_input = (c_last_input - running_mean) / \
            torch.sqrt(running_variance + eps)

        if weight is not None:
            c_last_input = c_last_input * weight
        if bias is not None:
            c_last_input = c_last_input + bias

        torch.cuda.nvtx.range_pop()
        return c_last_input.transpose(1, -1).contiguous().clone()

    @staticmethod
    def backward(ctx, grad_output):
        torch.cuda.nvtx.range_push("sync_BN_bw")
        # mini batch mean & var are calculated by forward path.
        # mu = 1./N*np.sum(h, axis = 0)
        # var = 1./N*np.sum((h-mu)**2, axis = 0)
        c_last_input, weight, bias, running_mean, running_variance = ctx.saved_tensors

        eps = ctx.eps
        process_group = ctx.process_group
        world_size = ctx.world_size
        grad_input = grad_weight = grad_bias = None
        num_features = running_mean.size()[0]

        # transpose it to channel last to support broadcasting for input with different rank
        torch.cuda.nvtx.range_push("carilli field")
        c_last_grad = grad_output.transpose(1, -1).contiguous()
        # squash non-channel dimension so we can easily calculate mean
        c_grad = c_last_grad.view(-1, num_features).contiguous()
        torch.cuda.nvtx.range_pop()

        # calculate grad_input
        if ctx.needs_input_grad[0]:
            # dh = gamma * (var + eps)**(-1. / 2.) * (dy - np.mean(dy, axis=0)
            #     - (h - mu) * (var + eps)**(-1.0) * np.mean(dy * (h - mu), axis=0))
            mean_dy = c_grad.mean(0)
            mean_dy_xmu = (c_last_grad * (c_last_input -
                                          running_mean)).view(-1, num_features).mean(0)
            if torch.distributed.is_initialized():
                torch.distributed.all_reduce(
                    mean_dy, ReduceOp.SUM, process_group)
                mean_dy = mean_dy / world_size
                torch.distributed.all_reduce(
                    mean_dy_xmu, ReduceOp.SUM, process_group)
                mean_dy_xmu = mean_dy_xmu / world_size
            c_last_grad_input = (c_last_grad - mean_dy - (c_last_input - running_mean) / (
                running_variance + eps) * mean_dy_xmu) / torch.sqrt(running_variance + eps)
            if weight is not None:
                c_last_grad_input.mul_(weight)
            grad_input = c_last_grad_input.transpose(1, -1).contiguous()

        # calculate grad_weight
        grad_weight = None
        if weight is not None and ctx.needs_input_grad[1]:
            # dgamma = np.sum((h - mu) * (var + eps)**(-1. / 2.) * dy, axis=0)
            grad_weight = ((c_last_input - running_mean) / torch.sqrt(
                running_variance + eps) * c_last_grad).view(-1, num_features).sum(0)

        # calculate grad_bias
        grad_bias = None
        if bias is not None and ctx.needs_input_grad[2]:
            # dbeta = np.sum(dy, axis=0)
            grad_bias = c_grad.sum(0)

        torch.cuda.nvtx.range_pop()
        return grad_input, grad_weight, grad_bias, None, None, None, None, None


================================================
FILE: apex/apex/reparameterization/README.md
================================================
Under construction...


================================================
FILE: apex/apex/reparameterization/__init__.py
================================================
from .weight_norm import WeightNorm
from .reparameterization import Reparameterization

def apply_weight_norm(module, name='', dim=0, hook_child=True):
    """
    Applies weight normalization to a parameter in the given module.
    If no parameter is provided, applies weight normalization to all
    parameters in model (except 1-d vectors and scalars).

    .. math::
         \mathbf{w} = g \dfrac{\mathbf{v}}{\|\mathbf{v}\|}

    Weight normalization is a reparameterization that decouples the magnitude
    of a weight tensor from its direction. This replaces the parameter specified
    by `name` (e.g. "weight") with two parameters: one specifying the magnitude
    (e.g. "weight_g") and one specifying the direction (e.g. "weight_v").
    Weight normalization is implemented via a hook that recomputes the weight
    tensor from the magnitude and direction before every :meth:`~Module.forward`
    call.

    By default, with `dim=0`, the norm is computed independently per output
    channel/plane. To compute a norm over the entire weight tensor, use
    `dim=None`.

    See https://arxiv.org/abs/1602.07868

    Args:
        module (nn.Module): containing module
        name (str, optional): name of weight parameter
        dim (int, optional): dimension over which to compute the norm
        hook_child (boolean, optional): adds reparameterization hook to direct parent of the 
            parameters. If False, it's added to `module` instead. Default: True

    Returns:
        The original module with the weight norm hook

    Example::

        >>> m = apply_weight_norm(nn.Linear(20, 40), name='weight')
        Linear (20 -> 40)
        >>> m.weight_g.size()
        torch.Size([40, 1])
        >>> m.weight_v.size()
        torch.Size([40, 20])

    """
    return apply_reparameterization(module, reparameterization=WeightNorm, hook_child=hook_child,
                                    name=name, dim=dim)

def remove_weight_norm(module, name='', remove_all=False):
    """
    Removes the weight normalization reparameterization of a parameter from a module.
    If no parameter is supplied then all weight norm parameterizations are removed.
    Args:
        module (nn.Module): containing module
        name (str, optional): name of weight parameter
    Example:
        >>> m = apply_weight_norm(nn.Linear(20, 40))
        >>> remove_weight_norm(m)
    """
    return remove_reparameterization(module, reparameterization=WeightNorm,
                                    name=name, remove_all=remove_all)

def apply_reparameterization(module, reparameterization=None, name='', dim=0, hook_child=True):
    """
    Applies a given weight reparameterization (such as weight normalization) to
    a parameter in the given module. If no parameter is given, applies the reparameterization
    to all parameters in model (except 1-d vectors and scalars).

    Args:
        module (nn.Module): containing module
        reparameterization (Reparameterization): reparamaterization class to apply
        name (str, optional): name of weight parameter
        dim (int, optional): dimension over which to perform reparameterization op
        hook_child (boolean, optional): adds reparameterization hook to direct parent of the 
            parameters. If False, it's added to `module` instead. Default: True

    Returns:
        The original module with the reparameterization hook

    Example::

        >>> m = apply_reparameterization(nn.Linear(20, 40), WeightNorm)
        Linear (20 -> 40)

    """
    assert reparameterization is not None
    if name != '':
        Reparameterization.apply(module, name, dim, reparameterization, hook_child)
    else:
        names = list(module.state_dict().keys())
        for name in names:
            apply_reparameterization(module, reparameterization, name, dim, hook_child)
    return module

def remove_reparameterization(module, reparameterization=Reparameterization,
                                name='', remove_all=False):
    """
    Removes the given reparameterization of a parameter from a module.
    If no parameter is supplied then all reparameterizations are removed.
    Args:
        module (nn.Module): containing module
        reparameterization (Reparameterization): reparamaterization class to apply
        name (str, optional): name of weight parameter
        remove_all (bool, optional): if True, remove all reparamaterizations of given type. Default: False
    Example:
        >>> m = apply_reparameterization(nn.Linear(20, 40),WeightNorm)
        >>> remove_reparameterization(m)
    """
    if name != '' or remove_all:
        to_remove = []
        for k, hook in module._forward_pre_hooks.items():
            if isinstance(hook, reparameterization) and (hook.name == name or remove_all):
                hook.remove(module)
                to_remove.append(k)
        if len(to_remove) > 0:
            for k in to_remove:
                del module._forward_pre_hooks[k]
            return module
        if not remove_all:
            raise ValueError("reparameterization of '{}' not found in {}"
                             .format(name, module))
    else:
        modules = [module]+[x for x in module.modules()]
        for m in modules:
            remove_reparameterization(m, reparameterization=reparameterization, remove_all=True)
        return module


================================================
FILE: apex/apex/reparameterization/reparameterization.py
================================================
import torch
from torch.nn.parameter import Parameter
import sys
class Reparameterization(object):
    """
    Class interface for performing weight reparameterizations
    Arguments:
        name (str): name of weight parameter
        dim (int): dimension over which to compute the norm
        module (nn.Module): parent module to which param `name` is registered to
        retain_forward (bool, optional): if False deletes weight on call to 
            module.backward. Used to avoid memory leaks with DataParallel Default: True
    Attributes:
        reparameterization_names (list, str): contains names of all parameters 
            needed to compute reparameterization.
        backward_hook_key (int): torch.utils.hooks.RemovableHandle.id for hook used in module backward pass.
    """

    def __init__(self, name, dim, module, retain_forward=True):
        self.name = name
        self.dim = dim
        self.evaluated = False
        self.retain_forward = retain_forward
        self.reparameterization_names = []
        self.backward_hook_key = None
        self.module = module

    def compute_weight(self, module=None, name=None):
        """
        Computes reparameterized weight value to assign value to module attribute
        with name `name`.
        See WeightNorm class for example.
        Arguments:
            module (nn.Module): module with weight we'd like to reparameterize
        Returns:
            w (Tensor): Tensor object containing value of reparameterized weight
        """
        raise NotImplementedError

    def reparameterize(self, name, weight, dim):
        """
        Creates Parameters to be used for reparameterization and creates names that
        for attributes for the module these Parameters will correspond to.
        The parameters will be registered according to the names provided.
        See WeightNorm class for example.
        Arguments:
            module (nn.Module): module with weight we'd like to reparameterize
            name (str, optional): name of weight parameter
            dim (int, optional): dimension over which to compute parameterization
        Returns:
            names (list, str): names of Parameters to be used for reparameterization
            params (list, Parameter): Parameters to be used for reparameterization
        """
        raise NotImplementedError

    @staticmethod
    def apply(module, name, dim, reparameterization=None, hook_child=True):
        """
        Applies reparametrization to module's `name` parameter and modifies instance attributes as appropriate.
        `hook_child` adds reparameterization hook to direct parent of the parameters. If False, it's added to `module` instead.
        """
        if reparameterization is None:
            reparameterization = Reparameterization
        module2use, name2use = Reparameterization.get_module_and_name(module, name)
        # does not work on sparse
        if name2use is None or isinstance(module2use, (torch.nn.Embedding, torch.nn.EmbeddingBag)):
            return

        if hook_child:
            fn = reparameterization(name2use, dim, module2use)
        else:
            fn = reparameterization(name, dim, module)

        weight = getattr(module2use, name2use)
        if weight.dim() <= 1:
            return

        # remove weight from parameter list
        del module2use._parameters[name2use]

        # add parameters of reparameterization of parameter to module
        names, params = fn.reparameterize(name2use, weight, dim)
        for n, p in zip(names, params):
            module2use.register_parameter(n, p)

        # add parameters to reparameterization so they can be removed later
        fn.reparameterization_names = names

        setattr(module2use, name2use, None)

        hook_module = module2use
        if not hook_child:
            hook_module = module
        # recompute weight before every forward()
        hook_module.register_forward_pre_hook(fn)

        # remove weight during backward
        handle = hook_module.register_backward_hook(fn.backward_hook)
        # get hook key so we can delete it later
        fn.backward_hook_key = handle.id

        return fn

    @staticmethod
    def get_module_and_name(module, name):
        """
        recursively fetches (possible) child module and name of weight to be reparameterized
        """
        name2use = None
        module2use = None
        names = name.split('.')
        if len(names) == 1 and names[0] != '':
            name2use = names[0]
            module2use = module
        elif len(names) > 1:
            module2use = module
            name2use = names[0]
            for i in range(len(names)-1):
                module2use = getattr(module2use, name2use)
                name2use = names[i+1]
        return module2use, name2use

    def get_params(self, module):
        """gets params of reparameterization based on known attribute names"""
        return [getattr(module, n) for n in self.reparameterization_names]

    def remove(self, module):
        """removes reparameterization and backward hook (does not remove forward hook)"""
        module2use, name2use = Reparameterization.get_module_and_name(module, self.name)
        for p in self.get_params(module2use):
            p.requires_grad = False
        weight = self.compute_weight(module2use, name2use)
        delattr(module2use, name2use)
        for n in self.reparameterization_names:
            del module2use._parameters[n]
        module2use.register_parameter(name2use, Parameter(weight.data))
        del module._backward_hooks[self.backward_hook_key]

    def __call__(self, module, inputs):
        """callable hook for forward pass"""
        module2use, name2use = Reparameterization.get_module_and_name(module, self.name)
        _w = getattr(module2use, name2use)
        if not self.evaluated or _w is None:
            setattr(module2use, name2use, self.compute_weight(module2use, name2use))
            self.evaluated = True

    def backward_hook(self, module, grad_input, grad_output):
        """callable hook for backward pass"""
        module2use, name2use = Reparameterization.get_module_and_name(module, self.name)
        wn = getattr(module2use, name2use)
        self.evaluated = False


================================================
FILE: apex/apex/reparameterization/weight_norm.py
================================================
import torch
from torch.nn.parameter import Parameter
from ..fp16_utils import Fused_Weight_Norm
import time

from .reparameterization import Reparameterization

def _norm(p, dim):
    """Computes the norm over all dimensions except dim"""
    if dim is None:
        return p.norm()
    elif dim == 0:
        output_size = (p.size(0),) + (1,) * (p.dim() - 1)
        return p.contiguous().view(p.size(0), -1).norm(dim=1).view(*output_size)
    elif dim == p.dim() - 1:
        output_size = (1,) * (p.dim() - 1) + (p.size(-1),)
        return p.contiguous().view(-1, p.size(-1)).norm(dim=0).view(*output_size)
    return _norm(p.transpose(0, dim), 0).transpose(0, dim)

HALF_TYPES = (torch.cuda.HalfTensor, torch.HalfTensor)

class WeightNorm(Reparameterization):
    """
    Weight normalization is a reparameterization that decouples the magnitude
    of a weight tensor from its direction. This replaces the parameter specified
    by `name` (e.g. "weight") with two parameters: one specifying the magnitude
    (e.g. "weight_g") and one specifying the direction (e.g. "weight_v").
    Weight normalization is implemented via a hook that recomputes the weight
    tensor from the magnitude and direction before every :meth:`~Module.forward`
    call.

    .. math::
         \mathbf{w} = g \dfrac{\mathbf{v}}{\|\mathbf{v}\|}

    By default, with `dim=0`, the norm is computed independently per output
    channel/plane. To compute a norm over the entire weight tensor, use
    `dim=None`.
    """
    def compute_weight(self, module=None, name=None):
        """
        Computes weight normalized weight value to assign value to module attribute
        with name `name`.
        Arguments:
            module (nn.Module): module with weight we'd like to reparameterize
        Returns:
            w (Tensor): Tensor object containing value of reparameterized weight
        """
        if module is None:
            module = self.module
        if name is None:
            name = self.name
        module, name = Reparameterization.get_module_and_name(module, name)
        g = getattr(module, name + '_g')
        v = getattr(module, name + '_v')

        fused_weight_norm = Fused_Weight_Norm.apply
        v = v.contiguous()
        w = fused_weight_norm(v, g, self.dim)

        return w

    def reparameterize(self, name, weight, dim):
        """
        Creates Parameters v and gto be used for weight normalization
        and creates names that for attributes for the module these Parameters
        will correspond to. The parameters will be registered according to the names
        provided.
        Arguments:
            module (nn.Module): module with weight we'd like to reparameterize
            name (str, optional): name of weight parameter
            dim (int, optional): dimension over which to compute parameterization
        Returns:
            names (list, str): names of Parameters to be used for reparameterization
            params (list, Parameter): Parameters to be used for reparameterization
        """
        names = [name + '_g', name + '_v']
        params = [Parameter(_norm(weight, dim).data), Parameter(weight.data)]
        return names, params


================================================
FILE: apex/apex.patch
================================================
diff --git a/csrc/fused_adam_cuda_kernel.cu b/csrc/fused_adam_cuda_kernel.cu
index 34f7aa2..95581d1 100644
--- a/csrc/fused_adam_cuda_kernel.cu
+++ b/csrc/fused_adam_cuda_kernel.cu
@@ -19,8 +19,8 @@ typedef enum{
 
 template <typename T, typename GRAD_T>
 __global__ void adam_cuda_kernel(
-        T* __restrict__ p,
-        GRAD_T* __restrict__ p_copy, // For mixed precision training, pass NULL if not needed
+        GRAD_T* __restrict__ p,
+        T* __restrict__ p_copy, // For mixed precision training, pass NULL if not needed
         T* __restrict__ m,
         T* __restrict__ v,
         const GRAD_T * __restrict__ g,
@@ -50,7 +50,7 @@ __global__ void adam_cuda_kernel(
                 else // Mode 1
                     denom = sqrtf(v[j]) + eps;
                 float update = (m[j]/denom) + (decay*p[j]);
-                p[j] = p[j] - (step_size*update);
+                p[j] = (GRAD_T) (p[j] - (step_size*update));
                 if (p_copy != NULL) p_copy[j] = (GRAD_T) p[j];
         }
 }
@@ -93,14 +93,14 @@ void fused_adam_cuda(
 
         if (g.scalar_type() == at::ScalarType::Half) {
 //all other values should be fp32 for half gradients
-            AT_ASSERTM(p.scalar_type() == at::ScalarType::Float, "expected parameter to be of float type");
+//            AT_ASSERTM(p.scalar_type() == at::ScalarType::Float, "expected parameter to be of float type");
 //dispatch is done on the gradient type
             using namespace at; // prevents "toString is undefined" errors
             DISPATCH_FLOAT_AND_HALF(g.scalar_type(), 0, "adam_cuda_kernel", 
                 using accscalar_t = at::acc_type<scalar_t_0, true>;
                 adam_cuda_kernel<accscalar_t, scalar_t_0><<<blocks,threadsPerBlock, 0, stream>>>(
-                        p.data<accscalar_t>(),
-                        p_copy.numel() ? p_copy.data<scalar_t_0>() : NULL,
+                        p.data<scalar_t_0>(),
+                        NULL, //don't output p_copy for fp32, it's wasted write
                         m.data<accscalar_t>(),
                         v.data<accscalar_t>(),
                         g.data<scalar_t_0>(),


================================================
FILE: apex/csrc/amp_C_frontend.cpp
================================================
#include <torch/extension.h>

void multi_tensor_scale_cuda(
  int chunk_size,
  at::Tensor noop_flag,
  std::vector<std::vector<at::Tensor>> tensor_lists,
  float scale);

void multi_tensor_axpby_cuda(
  int chunk_size,
  at::Tensor noop_flag,
  std::vector<std::vector<at::Tensor>> tensor_lists,
  float a,
  float b,
  int arg_to_check);

std::tuple<at::Tensor, at::Tensor> multi_tensor_l2norm_cuda(
  int chunk_size,
  at::Tensor noop_flag,
  std::vector<std::vector<at::Tensor>> tensor_lists,
  at::optional<bool> per_tensor_python);

void multi_tensor_lamb_stage1_cuda(
  int chunk_size,
  at::Tensor noop_flag,
  std::vector<std::vector<at::Tensor>> tensor_lists,
  at::Tensor per_tensor_decay,
  const int step,
  const float beta1,
  const float beta2,
  const float epsilon,
  const float global_grad_norm,
  const float max_global_grad_norm);

void multi_tensor_lamb_stage2_cuda(
  int chunk_size,
  at::Tensor noop_flag,
  std::vector<std::vector<at::Tensor>> tensor_lists,
  at::Tensor per_tensor_param_norm,
  at::Tensor per_tensor_update_norm,
  const float step_size);

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("multi_tensor_scale", &multi_tensor_scale_cuda,
        "Fused overflow check + scale for a list of contiguous tensors");
  m.def("multi_tensor_axpby", &multi_tensor_axpby_cuda,
        "out = a*x + b*y for a list of contiguous tensors");
  m.def("multi_tensor_l2norm", &multi_tensor_l2norm_cuda,
        "Computes L2 norm for a list of contiguous tensors");
  m.def("multi_tensor_lamb_stage1_cuda", &multi_tensor_lamb_stage1_cuda,
        "Computes update part of LAMB optimizer");
  m.def("multi_tensor_lamb_stage2_cuda", &multi_tensor_lamb_stage2_cuda,
        "Completes application of gradient to parameters for LAMB optimizer");
}


================================================
FILE: apex/csrc/flatten_unflatten.cpp
================================================
#include <torch/extension.h>
#include <torch/csrc/utils/tensor_flatten.h>
// https://github.com/pytorch/pytorch/blob/master/torch/csrc/utils/tensor_flatten.h

at::Tensor flatten(std::vector<at::Tensor> tensors)
{
  return torch::utils::flatten_dense_tensors(tensors);
}

std::vector<at::Tensor> unflatten(at::Tensor flat, std::vector<at::Tensor> tensors)
{
  return torch::utils::unflatten_dense_tensors(flat, tensors);
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("flatten", &flatten, "Flatten dense tensors");
  m.def("unflatten", &unflatten, "Unflatten dense tensors");
}


================================================
FILE: apex/csrc/fused_adam_cuda.cpp
================================================
#include <torch/extension.h>

// CUDA forward declaration
void fused_adam_cuda(at::Tensor & p, at::Tensor & p_copy, at::Tensor & m, at::Tensor & v, at::Tensor & g, float lr, float beta1, float beta2, float eps, float grad_scale, int step, int mode, int bias_correction, float decay);

#define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)

// C++ interface
void adam(at::Tensor & p, at::Tensor & p_copy, at::Tensor & m, at::Tensor & v, at::Tensor & g, float lr, float beta1, float beta2, float eps, float grad_scale, int step, int mode, int bias_correction, float decay) {
        CHECK_INPUT(p)
        if (p_copy.numel() > 0) CHECK_INPUT(p_copy);
        CHECK_INPUT(m);
        CHECK_INPUT(v);
        CHECK_INPUT(g);
        int64_t num_elem = p.numel();
        AT_ASSERTM(m.numel() == num_elem, "number of elements in m and p tensors should be equal");
        AT_ASSERTM(v.numel() == num_elem, "number of elements in v and p tensors should be equal");
        AT_ASSERTM(g.numel() == num_elem, "number of elements in g and p tensors should be equal");
        AT_ASSERTM(p_copy.numel() == num_elem || p_copy.numel() == 0, "number of elements in p_copy and p tensors should be equal, or p_copy should be empty");

        fused_adam_cuda(p, p_copy, m, v, g, lr, beta1, beta2, eps, grad_scale, step, mode, bias_correction, decay);
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
        m.def("adam", &adam, "Adam optimized CUDA implementation.");
}


================================================
FILE: apex/csrc/fused_adam_cuda_kernel.cu
================================================
#include "ATen/ATen.h"
#include "ATen/cuda/CUDAContext.h"
#include "ATen/cuda/detail/IndexUtils.cuh"
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <cmath>
#include "ATen/TensorUtils.h"
#include "ATen/Type.h"
#include "ATen/AccumulateType.h"
#include <THC/THCGeneral.h>

#include "type_shim.h"

typedef enum{
    ADAM_MODE_0   =0, // eps under square root
    ADAM_MODE_1   =1  // eps outside square root
} adamMode_t;

template <typename T, typename GRAD_T>
__global__ void adam_cuda_kernel(
        GRAD_T* __restrict__ p,
        T* __restrict__ p_copy, // For mixed precision training, pass NULL if not needed
        T* __restrict__ m,
        T* __restrict__ v,
        const GRAD_T * __restrict__ g,
        const float b1,
        const float b2,
        const float eps,
        const float grad_scale,
        const float step_size,
        const size_t tsize,
        adamMode_t mode,
        const float decay)
{
        //Assuming 2D grids and 2D blocks
        const int blockId = gridDim.x * blockIdx.y + blockIdx.x;
        const int threadsPerBlock = blockDim.x * blockDim.y;
        const int threadIdInBlock = threadIdx.y * blockDim.x + threadIdx.x;
        const int i = (blockId * threadsPerBlock + threadIdInBlock);
        const int totThreads = gridDim.x*gridDim.y*threadsPerBlock;

        for (int j = i; j < tsize; j+=totThreads) {
                T scaled_grad = g[j]/grad_scale;
                m[j] = b1*m[j] + (1-b1)*scaled_grad;
                v[j] = b2*v[j] + (1-b2)*scaled_grad*scaled_grad;
                float denom;
                if (mode == ADAM_MODE_0)
                    denom = sqrtf(v[j] + eps);
                else // Mode 1
                    denom = sqrtf(v[j]) + eps;
                float update = (m[j]/denom) + (decay*p[j]);
                p[j] = (GRAD_T) (p[j] - (step_size*update));
                if (p_copy != NULL) p_copy[j] = (GRAD_T) p[j];
        }
}

void fused_adam_cuda(
        at::Tensor & p,
        at::Tensor & p_copy,
        at::Tensor & m,
        at::Tensor & v,
        at::Tensor & g,
        float lr,
        float beta1,
        float beta2,
        float eps,
        float grad_scale,
        int step,
        int mode,
        int bias_correction,
        float decay)
{
//        using namespace at;

        //Get tensor size
        int tsize = p.numel();
        //Determine #threads and #blocks
        const int threadsPerBlock = 512;
        const dim3 blocks((tsize+threadsPerBlock-1)/threadsPerBlock);
        AT_ASSERTM(at::cuda::detail::canUse32BitIndexMath(p), "parameter tensor is too large to be indexed with int32");
        //Constants
        float step_size = 0;
        if (bias_correction == 1) {
            const float bias_correction1 = 1 - std::pow(beta1, step);
            const float bias_correction2 = 1 - std::pow(beta2, step);
            step_size = lr * std::sqrt(bias_correction2)/bias_correction1;
        }
        else {
            step_size = lr;
        }
        cudaStream_t stream = at::cuda::getCurrentCUDAStream();

        if (g.scalar_type() == at::ScalarType::Half) {
//all other values should be fp32 for half gradients
//            AT_ASSERTM(p.scalar_type() == at::ScalarType::Float, "expected parameter to be of float type");
//dispatch is done on the gradient type
            using namespace at; // prevents "toString is undefined" errors
            DISPATCH_FLOAT_AND_HALF(g.scalar_type(), 0, "adam_cuda_kernel", 
                using accscalar_t = at::acc_type<scalar_t_0, true>;
                adam_cuda_kernel<accscalar_t, scalar_t_0><<<blocks,threadsPerBlock, 0, stream>>>(
                        p.data<scalar_t_0>(),
                        NULL, //don't output p_copy for fp32, it's wasted write
                        m.data<accscalar_t>(),
                        v.data<accscalar_t>(),
                        g.data<scalar_t_0>(),
                        beta1,
                        beta2,
                        eps,
                        grad_scale,
                        step_size,
                        tsize,
                        (adamMode_t) mode,
                        decay);
                )
      } else {
            using namespace at;
            DISPATCH_DOUBLE_AND_FLOAT(g.scalar_type(), 0, "adam_cuda_kernel",
                adam_cuda_kernel<scalar_t_0, scalar_t_0><<<blocks,threadsPerBlock, 0, stream>>>(
                        p.data<scalar_t_0>(),
                        NULL, //don't output p_copy for fp32, it's wasted write
                        m.data<scalar_t_0>(),
                        v.data<scalar_t_0>(),
                        g.data<scalar_t_0>(),
                        beta1,
                        beta2,
                        eps,
                        grad_scale,
                        step_size,
                        tsize,
                        (adamMode_t) mode,
                        decay);
            );
      }
      THCudaCheck(cudaGetLastError());

}


================================================
FILE: apex/csrc/layer_norm_cuda.cpp
================================================
#include <torch/extension.h>
#include <vector>
#include <cassert>

namespace {
void compute_n1_n2(
    at::Tensor input,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    int& n1,
    int& n2)
{
    int idiff = input.ndimension() - normalized_shape.size();
    n2 = 1;
    for (int i = 0;  i < (int)normalized_shape.size();  ++i) {
	    assert( input.sizes()[i+idiff] == normalized_shape[i] );
	    n2 *= normalized_shape[i];
    }
    n1 = 1;
    for (int i = 0;  i < idiff;  ++i) {
	    n1 *= input.sizes()[i];
    }
}

void check_args(
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    at::Tensor gamma,
    at::Tensor beta
    )
{
    AT_CHECK(!gamma.defined() || gamma.sizes().equals(normalized_shape));
    AT_CHECK(!beta.defined() || beta.sizes().equals(normalized_shape));
}

void check_args(
    at::Tensor input,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    int& n1,
    int& n2
    )
{
    int64_t normalized_ndim = normalized_shape.size();

    if (normalized_ndim < 1) {
      std::stringstream ss;
      ss << "Expected normalized_shape to be at least 1-dimensional, i.e., "
         << "containing at least one element, but got normalized_shape="
         << normalized_shape;
      throw std::runtime_error(ss.str());
    }

    auto input_shape = input.sizes();
    auto input_ndim = input.dim();

    if (input_ndim < normalized_ndim ||
        !input_shape.slice(input_ndim - normalized_ndim).equals(normalized_shape)) {
      std::stringstream ss;
      ss << "Given normalized_shape=" << normalized_shape
         << ", expected input with shape [*";
      for (auto size : normalized_shape) {
        ss << ", " << size;
      }
      ss << "], but got input of size" << input_shape;
      throw std::runtime_error(ss.str());
    }

    compute_n1_n2(input,normalized_shape,n1,n2);
}


void check_args(
    at::Tensor input,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    at::Tensor gamma,
    at::Tensor beta,
    int& n1,
    int& n2
    )
{
    check_args(input,normalized_shape,n1,n2);
    check_args(normalized_shape,gamma,beta);
}
}

void cuda_layer_norm(
    at::Tensor* output,
    at::Tensor* mean,
    at::Tensor* invvar,
    at::Tensor* input,
    int n1,
    int n2,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    at::Tensor* gamma,
    at::Tensor* beta,
    double epsilon);

#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)

std::vector<at::Tensor> layer_norm(
    at::Tensor input,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    double epsilon) {
  CHECK_INPUT(input);
  int n1,n2;
  check_args(input,normalized_shape,n1,n2);
  at::Tensor output = at::empty_like(input);
  at::Tensor mean = at::empty({n1}, input.options().dtype(input.scalar_type()==at::ScalarType::Half ? at::ScalarType::Float : input.scalar_type()));
  at::Tensor invvar = at::empty_like(mean);
  cuda_layer_norm(&output,&mean,&invvar,&input,n1,n2,
      normalized_shape,NULL,NULL,epsilon);
  return {output, mean, invvar};
}
std::vector<at::Tensor> layer_norm_affine(
    at::Tensor input,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    at::Tensor gamma,
    at::Tensor beta,
    double epsilon) {
  CHECK_INPUT(input);
  CHECK_INPUT(gamma);
  CHECK_INPUT(beta);
  int n1,n2;
  check_args(input,normalized_shape,gamma,beta,n1,n2);
  at::Tensor output = at::empty_like(input);
  at::Tensor mean = at::empty({n1}, input.options().dtype(input.scalar_type()==at::ScalarType::Half ? at::ScalarType::Float : input.scalar_type()));
  at::Tensor invvar = at::empty_like(mean);
  cuda_layer_norm(&output,&mean,&invvar,&input,n1,n2,
      normalized_shape,&gamma,&beta,epsilon);
  return {output, mean, invvar};
}

void cuda_layer_norm_gradient(
    at::Tensor* dout,
    at::Tensor* mean,
    at::Tensor* invvar,
    at::Tensor* input,
    int n1,
    int n2,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    at::Tensor* gamma,
    at::Tensor* beta,
    double epsilon,
    at::Tensor* grad_input,
    at::Tensor* grad_gamma,
    at::Tensor* grad_beta
    );

at::Tensor layer_norm_gradient(
    at::Tensor dout,
    at::Tensor mean,
    at::Tensor invvar,
    at::Tensor input,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    double epsilon) {
  CHECK_INPUT(dout);
  CHECK_INPUT(mean);
  CHECK_INPUT(invvar);
  CHECK_INPUT(input);
  int n1,n2;
  check_args(input,normalized_shape,n1,n2);
  at::Tensor grad_input = at::empty_like(input);
  cuda_layer_norm_gradient(&dout,&mean,&invvar,&input,n1,n2,
      normalized_shape,NULL,NULL,epsilon,
      &grad_input,NULL,NULL);
  return grad_input;
}
std::vector<at::Tensor> layer_norm_gradient_affine(
    at::Tensor dout,
    at::Tensor mean,
    at::Tensor invvar,
    at::Tensor input,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    at::Tensor gamma,
    at::Tensor beta,
    double epsilon) {
  CHECK_INPUT(dout);
  CHECK_INPUT(mean);
  CHECK_INPUT(invvar);
  CHECK_INPUT(input);
  CHECK_INPUT(gamma);
  CHECK_INPUT(beta);
  int n1,n2;
  check_args(input,normalized_shape,gamma,beta,n1,n2);
  at::Tensor grad_input = at::empty_like(input);
  at::Tensor grad_gamma = at::empty_like(gamma);
  at::Tensor grad_beta = at::empty_like(beta);
  cuda_layer_norm_gradient(&dout,&mean,&invvar,&input,n1,n2,
      normalized_shape,&gamma,&beta,epsilon,
      &grad_input,&grad_gamma,&grad_beta);
  return {grad_input, grad_gamma, grad_beta};
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("forward_affine", &layer_norm_affine, "LayerNorm forward (CUDA)");
  m.def("forward", &layer_norm, "LayerNorm forward (CUDA)");
  m.def("backward_affine", &layer_norm_gradient_affine, "LayerNorm backward (CUDA)");
  m.def("backward", &layer_norm_gradient, "LayerNorm backward (CUDA)");
}


================================================
FILE: apex/csrc/layer_norm_cuda_kernel.cu
================================================
#include "ATen/ATen.h"
#include "ATen/AccumulateType.h"
#include "ATen/cuda/CUDAContext.h"
#include <THC/THCDeviceUtils.cuh>

#include <cuda.h>
#include <cuda_runtime.h>

#include "type_shim.h"

template<typename U> __device__
void cuWelfordOnlineSum(
  const U curr,
  U& mu,
  U& sigma2,
  U& count)
{
  count = count + U(1);
  U delta = curr - mu;
  U lmean = mu + delta / count;
  mu = lmean;
  U delta2 = curr - lmean;
  sigma2 = sigma2 + delta * delta2;
}

template<typename U> __device__
void cuChanOnlineSum(
  const U muB,
  const U sigma2B,
  const U countB,
  U& mu,
  U& sigma2,
  U& count)
{
  U delta = muB - mu;
  U nA = count;
  U nB = countB;
  count = count + countB;
  U nX = count;
  if (nX > U(0)) {
    nA = nA / nX;
    nB = nB / nX;
    mu = nA*mu + nB*muB;
    sigma2 = sigma2 + sigma2B + delta * delta * nA * nB * nX;
  } else {
    mu = U(0);
    sigma2 = U(0);
  }
}

template<typename T, typename U> __device__
void cuWelfordMuSigma2(
  const T* __restrict__ vals,
  const int n1,
  const int n2,
  U& mu,
  U& sigma2,
  U* buf) 
{
  // Assumptions:
  // 1) blockDim.x == warpSize
  // 2) Tensor is contiguous
  // 3) 2*blockDim.y*sizeof(U)+blockDim.y*sizeof(int) shared memory available.
  //
  // compute variance and mean over n2
  U count = U(0);
  mu= U(0);
  sigma2 = U(0);
  int i1 = blockIdx.y;
  if (i1 < n1) {
    // one warp normalizes one n1 index,
    // synchronization is implicit
    // initialize with standard Welford algorithm
    const int numx = blockDim.x * blockDim.y;
    const int thrx = threadIdx.x + threadIdx.y * blockDim.x;
    const T* lvals = vals + i1*n2;
    int l = 4*thrx;
    for (;  l+3 < n2;  l+=4*numx) {
      for (int k = 0;  k < 4;  ++k) {
        U curr = static_cast<U>(lvals[l+k]);
        cuWelfordOnlineSum<U>(curr,mu,sigma2,count);
      }
    }
    for (;  l < n2;  ++l) {
      U curr = static_cast<U>(lvals[l]);
      cuWelfordOnlineSum<U>(curr,mu,sigma2,count);
    }
    // intra-warp reductions
    for (int l = 0;  l <= 4;  ++l) {
      int srcLaneB = (threadIdx.x+(1<<l))&31;
      U muB = WARP_SHFL(mu, srcLaneB);
      U countB = WARP_SHFL(count, srcLaneB);
      U sigma2B = WARP_SHFL(sigma2, srcLaneB);
      cuChanOnlineSum<U>(muB,sigma2B,countB,mu,sigma2,count);
    }
    // threadIdx.x == 0 has correct values for each warp
    // inter-warp reductions
    if (blockDim.y > 1) {
      U* ubuf = (U*)buf;
      U* ibuf = (U*)(ubuf + blockDim.y);
      for (int offset = blockDim.y/2;  offset > 0;  offset /= 2) {
        // upper half of warps write to shared
        if (threadIdx.x == 0 && threadIdx.y >= offset && threadIdx.y < 2*offset) {
          const int wrt_y = threadIdx.y - offset;
          ubuf[2*wrt_y] = mu;
          ubuf[2*wrt_y+1] = sigma2;
          ibuf[wrt_y] = count;
        }
        __syncthreads();
        // lower half merges
        if (threadIdx.x == 0 && threadIdx.y < offset) {
          U muB = ubuf[2*threadIdx.y];
          U sigma2B = ubuf[2*threadIdx.y+1];
          U countB = ibuf[threadIdx.y];
          cuChanOnlineSum<U>(muB,sigma2B,countB,mu,sigma2,count);
        }
        __syncthreads();
      }
      // threadIdx.x = 0 && threadIdx.y == 0 only thread that has correct values
      if (threadIdx.x == 0 && threadIdx.y == 0) {
        ubuf[0] = mu;
        ubuf[1] = sigma2;
      }
      __syncthreads();
      mu = ubuf[0];
      sigma2 = ubuf[1]/U(n2);
      // don't care about final value of count, we know count == n2
    } else {
      mu = WARP_SHFL(mu, 0);
      sigma2 = WARP_SHFL(sigma2/U(n2), 0);
    }
  }
}

template<> __device__
void cuWelfordMuSigma2(
  const at::Half* __restrict__ vals,
  const int n1,
  const int n2,
  float& mu,
  float& sigma2,
  float* buf) 
{
  // Assumptions:
  // 1) blockDim.x == warpSize
  // 2) Tensor is contiguous
  // 3) 2*blockDim.y*sizeof(U)+blockDim.y*sizeof(int) shared memory available.
  //
  // compute variance and mean over n2
  float count = 0.0f;
  mu= float(0);
  sigma2 = float(0);
  int i1 = blockIdx.y;
  if (i1 < n1) {
    // one warp normalizes one n1 index,
    // synchronization is implicit
    // initialize with standard Welford algorithm
    const int numx = blockDim.x * blockDim.y;
    const int thrx = threadIdx.x + threadIdx.y * blockDim.x;
    const at::Half* lvals = vals + i1*n2;
    int l = 8*thrx;
    if ((((size_t)lvals)&3) != 0) {
      // 16 bit alignment
      // first thread consumes first point
      if (thrx == 0) {
        float curr = static_cast<float>(lvals[0]);
        cuWelfordOnlineSum(curr,mu,sigma2,count);
      }
      ++l;
    }
    // at this point, lvals[l] are 32 bit aligned for all threads.
    for (;  l+7 < n2;  l+=8*numx) {
      for (int k = 0;  k < 8;  k+=2) {
        float2 curr = __half22float2(*((__half2*)(lvals+l+k)));
        cuWelfordOnlineSum(curr.x,mu,sigma2,count);
	cuWelfordOnlineSum(curr.y,mu,sigma2,count);
      }
    }
    for (;  l < n2;  ++l) {
      float curr = static_cast<float>(lvals[l]);
      cuWelfordOnlineSum(curr,mu,sigma2,count);
    }
    // intra-warp reductions
    for (int l = 0;  l <= 4;  ++l) {
      int srcLaneB = (threadIdx.x+(1<<l))&31;
      float muB = WARP_SHFL(mu, srcLaneB);
      float countB = WARP_SHFL(count, srcLaneB);
      float sigma2B = WARP_SHFL(sigma2, srcLaneB);
      cuChanOnlineSum(muB,sigma2B,countB,mu,sigma2,count);
    }
    // threadIdx.x == 0 has correct values for each warp
    // inter-warp reductions
    if (blockDim.y > 1) {
      float* ubuf = (float*)buf;
      float* ibuf = (float*)(ubuf + blockDim.y);
      for (int offset = blockDim.y/2;  offset > 0;  offset /= 2) {
        // upper half of warps write to shared
        if (threadIdx.x == 0 && threadIdx.y >= offset && threadIdx.y < 2*offset) {
          const int wrt_y = threadIdx.y - offset;
          ubuf[2*wrt_y] = mu;
          ubuf[2*wrt_y+1] = sigma2;
          ibuf[wrt_y] = count;
        }
        __syncthreads();
        // lower half merges
        if (threadIdx.x == 0 && threadIdx.y < offset) {
          float muB = ubuf[2*threadIdx.y];
          float sigma2B = ubuf[2*threadIdx.y+1];
          float countB = ibuf[threadIdx.y];
          cuChanOnlineSum(muB,sigma2B,countB,mu,sigma2,count);
        }
        __syncthreads();
      }
      // threadIdx.x = 0 && threadIdx.y == 0 only thread that has correct values
      if (threadIdx.x == 0 && threadIdx.y == 0) {
        ubuf[0] = mu;
        ubuf[1] = sigma2;
      }
      __syncthreads();
      mu = ubuf[0];
      sigma2 = ubuf[1]/float(n2);
      // don't care about final value of count, we know count == n2
    } else {
      mu = WARP_SHFL(mu, 0);
      sigma2 = WARP_SHFL(sigma2/float(n2), 0);
    }
  }
}

template<typename U> U rsqrt(U v) {
  return U(1) / sqrt(v);
}
template<> float rsqrt(float v) {
  return rsqrtf(v);
}
template<> double rsqrt(double v) {
  return rsqrt(v);
}

namespace {
// This is the un-specialized struct.  Note that we prevent instantiation of this
// struct by putting an undefined symbol in the function body so it won't compile.
//  template <typename T>
//  struct SharedMemory
//  {
//      // Ensure that we won't compile any un-specialized types
//      __device__ T *getPointer()
//      {
//          extern __device__ void error(void);
//          error();
//          return NULL;
//      }
//  };
// https://github.com/NVIDIA/apex/issues/246
template <typename T>
struct SharedMemory;

template <>
struct SharedMemory <float>
{
    __device__ float *getPointer()
    {
        extern __shared__ float s_float[];
        return s_float;
    }
};

template <>
struct SharedMemory <double>
{
    __device__ double *getPointer()
    {
        extern __shared__ double s_double[];
        return s_double;
    }
};
}

template<typename T, typename U> __global__
void cuApplyLayerNorm(
  T* __restrict__ output_vals,
  U* __restrict__ mean,
  U* __restrict__ invvar,
  const T* __restrict__ vals,
  const int n1,
  const int n2,
  const U epsilon,
  const T* __restrict__ gamma,
  const T* __restrict__ beta
  ) 
{
  // Assumptions:
  // 1) blockDim.x == warpSize
  // 2) Tensors are contiguous
  //
  int i1 = blockIdx.y;
  if (i1 < n1) {
    SharedMemory<U> shared;
    U* buf = shared.getPointer();
    U mu,sigma2;
    cuWelfordMuSigma2(vals,n1,n2,mu,sigma2,buf);
    const T* lvals = vals + i1*n2;
    T* ovals = output_vals + i1*n2;
    U c_invvar = rsqrt(sigma2 + epsilon);
    const int numx = blockDim.x * blockDim.y;
    const int thrx = threadIdx.x + threadIdx.y * blockDim.x;
    if (gamma != NULL && beta != NULL) {
      for (int i = thrx;  i < n2;  i+=numx) {
        U curr = static_cast<U>(lvals[i]);
        ovals[i] = gamma[i] * static_cast<T>(c_invvar * (curr - mu)) + beta[i];
      }
    } else {
      for (int i = thrx;  i < n2;  i+=numx) {
        U curr = static_cast<U>(lvals[i]);
        ovals[i] = static_cast<T>(c_invvar * (curr - mu));
      }
    }
    if (threadIdx.x == 0 && threadIdx.y == 0) {
      mean[i1] = mu;
      invvar[i1] = c_invvar;
    }
  }
}

template<typename T, typename U> __device__
void cuLoadWriteStridedInputs(
    const int i1_block,
    const int thr_load_row_off,
    const int thr_load_col_off,
    const int i2_off,
    const int row_stride,
    U* warp_buf1,
    U* warp_buf2,
    const T* input,
    const T* dout,
    const int i1_end,
    const int n2,
    const U* __restrict__ mean,
    const U* __restrict__ invvar
    )
{
  int i1 = i1_block+thr_load_row_off;
  if (i1 < i1_end) {
    U curr_mean = mean[i1];
    U curr_invvar = invvar[i1];
    for (int k = 0;  k < blockDim.y;  ++k) {
      int i2 = i2_off + k;
      int load_idx = i1*n2+i2;
      int write_idx = thr_load_row_off*row_stride+thr_load_col_off+k;
      if (i2<n2) {
        U curr_input = static_cast<U>(input[load_idx]);
	U curr_dout = static_cast<U>(dout[load_idx]);
	warp_buf1[write_idx] = curr_dout;
	warp_buf2[write_idx] = curr_dout * (curr_input - curr_mean) * curr_invvar;
      } else {
        warp_buf1[write_idx] = U(0);
        warp_buf2[write_idx] = U(0);
      }
    }
  } else {
    for (int k = 0;  k < blockDim.y;  ++k) {
      int write_idx = thr_load_row_off*row_stride+thr_load_col_off+k;
      warp_buf1[write_idx] = U(0);
      warp_buf2[write_idx] = U(0);
    }
  }
}

template<typename T, typename U> __device__
void cuLoadAddStridedInputs(
    const int i1_block,
    const int thr_load_row_off,
    const int thr_load_col_off,
    const int i2_off,
    const int row_stride,
    U* warp_buf1,
    U* warp_buf2,
    const T* input,
    const T* dout,
    const int i1_end,
    const int n2,
    const U* __restrict__ mean,
    const U* __restrict__ invvar
    )
{
  int i1 = i1_block+thr_load_row_off;
  if (i1 < i1_end) {
    U curr_mean = mean[i1];
    U curr_invvar = invvar[i1];
    for (int k = 0;  k < blockDim.y;  ++k) {
      int i2 = i2_off + k;
      int load_idx = i1*n2+i2;
      int write_idx = thr_load_row_off*row_stride+thr_load_col_off+k;
      if (i2<n2) {
        U curr_input = static_cast<U>(input[load_idx]);
	U curr_dout = static_cast<U>(dout[load_idx]);
	warp_buf1[write_idx] += curr_dout;
	warp_buf2[write_idx] += curr_dout * (curr_input - curr_mean) * curr_invvar;
      }
    }
  }
}

template<typename T, typename U> __global__
void cuComputePartGradGammaBeta(
    const T* __restrict__ dout,
    const T* __restrict__ input,
    const int n1,
    const int n2,
    const U* __restrict__ mean,
    const U* __restrict__ invvar,
    U epsilon,
    U* part_grad_gamma,
    U* part_grad_beta)
{
    const int numsegs_n1 = (n1+blockDim.y*blockDim.y-1) / (blockDim.y*blockDim.y);
    const int segs_per_block = (numsegs_n1 + gridDim.y - 1) / gridDim.y;
    const int i1_beg = blockIdx.y * segs_per_block * blockDim.y*blockDim.y;
    const int i1_beg_plus_one = (blockIdx.y+1) * segs_per_block * blockDim.y*blockDim.y;
    const int i1_end = i1_beg_plus_one < n1 ? i1_beg_plus_one : n1;
    const int row_stride = blockDim.x+1;
    const int thr_load_col_off = (threadIdx.x*blockDim.y)&(blockDim.x-1);
    const int thr_load_row_off = (threadIdx.x*blockDim.y)/blockDim.x + threadIdx.y*blockDim.y;
    const int i2_off = blockIdx.x * blockDim.x + thr_load_col_off;
    SharedMemory<U> shared;
    U* buf = shared.getPointer(); // buf has at least blockDim.x * blockDim.y * blockDim.y + (blockDim.y - 1)*(blockDim.x/blockDim.y) elements
    U* warp_buf1 = (U*)buf;
    U* warp_buf2 = warp_buf1 + blockDim.y * blockDim.y * row_stride;
    // compute partial sums from strided inputs
    // do this to increase number of loads in flight
    cuLoadWriteStridedInputs(i1_beg,thr_load_row_off,thr_load_col_off,i2_off,row_stride,warp_buf1,warp_buf2,input,dout,i1_end,n2,mean,invvar);
    for (int i1_block = i1_beg+blockDim.y*blockDim.y;  i1_block < i1_end;  i1_block+=blockDim.y*blockDim.y) {
      cuLoadAddStridedInputs(i1_block,thr_load_row_off,thr_load_col_off,i2_off,row_stride,warp_buf1,warp_buf2,input,dout,i1_end,n2,mean,invvar);
    }
    __syncthreads();
    // inter-warp reductions
    // sum within each warp
    U acc1 = U(0);
    U acc2 = U(0);
    for (int k = 0;  k < blockDim.y;  ++k) {
      int row1 = threadIdx.y + k*blockDim.y;
      int idx1 = row1*row_stride + threadIdx.x;
      acc1 += warp_buf1[idx1];
      acc2 += warp_buf2[idx1];
    }
    warp_buf1[threadIdx.y*row_stride+threadIdx.x] = acc1;
    warp_buf2[threadIdx.y*row_stride+threadIdx.x] = acc2;
    __syncthreads();
    // sum all warps
    for (int offset = blockDim.y/2;  offset > 1;  offset /= 2) {
      if (threadIdx.y < offset) {
        int row1 = threadIdx.y;
	int row2 = threadIdx.y + offset;
	int idx1 = row1*row_stride + threadIdx.x;
	int idx2 = row2*row_stride + threadIdx.x;
	warp_buf1[idx1] += warp_buf1[idx2];
	warp_buf2[idx1] += warp_buf2[idx2];
      }
      __syncthreads();
    }
    int i2 = blockIdx.x * blockDim.x + threadIdx.x;
    if (threadIdx.y == 0 && i2 < n2) {
      int row1 = threadIdx.y;
      int row2 = threadIdx.y + 1;
      int idx1 = row1*row_stride + threadIdx.x;
      int idx2 = row2*row_stride + threadIdx.x;
      part_grad_beta[blockIdx.y*n2+i2] = warp_buf1[idx1] + warp_buf1[idx2];
      part_grad_gamma[blockIdx.y*n2+i2] = warp_buf2[idx1] + warp_buf2[idx2];
    }
}

template<typename T, typename U> __global__
void cuComputeGradGammaBeta(
    const U* part_grad_gamma,
    const U* part_grad_beta,
    const int part_size,
    const int n1,
    const int n2,
    T* grad_gamma,
    T* grad_beta)
{
    // sum partial gradients for gamma and beta
    SharedMemory<U> shared;
    U* buf = shared.getPointer(); 
    int i2 = blockIdx.x * blockDim.x + threadIdx.x;
    if (i2 < n2) {
      // each warp does sequential reductions until reduced part_size is num_warps
      int num_warp_reductions = part_size / blockDim.y;
      U sum_gamma = U(0);
      U sum_beta = U(0);
      const U* part_grad_gamma_ptr = part_grad_gamma + threadIdx.y * num_warp_reductions * n2 + i2;
      const U* part_grad_beta_ptr = part_grad_beta + threadIdx.y * num_warp_reductions * n2 + i2;
      for (int warp_offset = 0;  warp_offset < num_warp_reductions;  ++warp_offset) {
        sum_gamma += part_grad_gamma_ptr[warp_offset*n2];
        sum_beta += part_grad_beta_ptr[warp_offset*n2];
      }
      // inter-warp reductions
      const int nbsize3 = blockDim.x * blockDim.y / 2;
      for (int offset = blockDim.y/2;  offset >= 1;  offset /= 2) {
        // top half write to shared memory
        if (threadIdx.y >= offset && threadIdx.y < 2*offset) {
          const int write_idx = (threadIdx.y - offset) * blockDim.x + threadIdx.x;
          buf[write_idx] = sum_gamma;
          buf[write_idx+nbsize3] = sum_beta;
        }
        __syncthreads();
        // bottom half sums
        if (threadIdx.y < offset) {
          const int read_idx = threadIdx.y * blockDim.x + threadIdx.x;
          sum_gamma += buf[read_idx];
          sum_beta += buf[read_idx+nbsize3];
        }
        __syncthreads();
      }
      // write out fully summed gradients
      if (threadIdx.y == 0) {
        grad_gamma[i2] = sum_gamma;
        grad_beta[i2] = sum_beta;
      }
    }
}

template<typename T, typename U> __global__
void cuComputeGradInput(
    const T* __restrict__ dout,
    const T* __restrict__ input,
    const int n1,
    const int n2,
    const U* __restrict__ mean,
    const U* __restrict__ invvar,
    U epsilon,
    const T* gamma,
    T* grad_input)
{
  int i1 = blockIdx.y;
  if (i1 < n1) {
    U sum_loss1 = U(0);
    U sum_loss2 = U(0);
    const U c_mean = mean[i1];
    const U c_invvar = invvar[i1];
    const T* k_input = input + i1*n2;
    const T* k_dout = dout + i1*n2;
    const int numx = blockDim.x * blockDim.y;
    const int thrx = threadIdx.x + threadIdx.y * blockDim.x;
    if (gamma != NULL) {
      int l = 4*thrx;
      for (;  l+3 < n2;  l+=4*numx) {
        for (int k = 0;  k < 4;  ++k) {
          const U c_h = static_cast<U>(k_input[l+k]);
          const U c_loss = static_cast<U>(k_dout[l+k]);
          sum_loss1 += c_loss * gamma[l+k];
          sum_loss2 += c_loss * gamma[l+k] * (c_h - c_mean) * c_invvar;
        }
      }
      for (;  l < n2;  ++l) {
        const U c_h = static_cast<U>(k_input[l]);
        const U c_loss = static_cast<U>(k_dout[l]);
        sum_loss1 += c_loss * gamma[l];
        sum_loss2 += c_loss * gamma[l] * (c_h - c_mean) * c_invvar;
      }
    } else {
      int l = 4*thrx;
      for (;  l+3 < n2;  l+=4*numx) {
        for (int k = 0;  k < 4;  ++k) {
          const U c_h = static_cast<U>(k_input[l+k]);
          const U c_loss = static_cast<U>(k_dout[l+k]);
          sum_loss1 += c_loss;
          sum_loss2 += c_loss * (c_h - c_mean) * c_invvar;
        }
      }
      for (;  l < n2;  ++l) {
        const U c_h = static_cast<U>(k_input[l]);
        const U c_loss = static_cast<U>(k_dout[l]);
        sum_loss1 += c_loss;
        sum_loss2 += c_loss * (c_h - c_mean) * c_invvar;
      }
    }
    // intra-warp reductions
    for (int mask = blockDim.x/2;  mask > 0;  mask /= 2) {
      sum_loss1 += WARP_SHFL_XOR(sum_loss1, mask);
      sum_loss2 += WARP_SHFL_XOR(sum_loss2, mask);
    }
    // inter-warp reductions
    if (blockDim.y > 1) {
      SharedMemory<U> shared;
      U* buf = shared.getPointer(); 
      for (int offset = blockDim.y/2;  offset > 0;  offset /= 2) {
        // upper half of warps write to shared
        if (threadIdx.y >= offset && threadIdx.y < 2*offset) {
          const int wrt_i = (threadIdx.y - offset) * blockDim.x + threadIdx.x;
          buf[2*wrt_i] = sum_loss1;
          buf[2*wrt_i+1] = sum_loss2;
        }
        __syncthreads();
        // lower half merges
        if (threadIdx.y < offset) {
          const int read_i = threadIdx.y * blockDim.x + threadIdx.x;
          sum_loss1 += buf[2*read_i];
          sum_loss2 += buf[2*read_i+1];
        }
        __syncthreads();
      }
      if (threadIdx.y == 0) {
        buf[2*threadIdx.x] = sum_loss1;
        buf[2*threadIdx.x+1] = sum_loss2;
      }
      __syncthreads();
      if (threadIdx.y !=0) {
        sum_loss1 = buf[2*threadIdx.x];
        sum_loss2 = buf[2*threadIdx.x+1];
      } 
    }
    // all threads now have the two sums over l
    U fH = (U)n2;
    U term1 = (U(1) / fH) * c_invvar;
    T* k_grad_input = grad_input + i1*n2;
    if (gamma != NULL) {
      for (int l = thrx;  l < n2;  l+=numx) {
        const U c_h = static_cast<U>(k_input[l]);
        const U c_loss = static_cast<U>(k_dout[l]);
        U f_grad_input = fH * c_loss * gamma[l];
        f_grad_input -= sum_loss1;
        f_grad_input -= (c_h - c_mean) * c_invvar * sum_loss2;
        f_grad_input *= term1;
        k_grad_input[l] = static_cast<T>(f_grad_input);
      }
    } else {
      for (int l = thrx;  l < n2;  l+=numx) {
        const U c_h = static_cast<U>(k_input[l]);
        const U c_loss = static_cast<U>(k_dout[l]);
        U f_grad_input = fH * c_loss;
        f_grad_input -= sum_loss1;
        f_grad_input -= (c_h - c_mean) * c_invvar * sum_loss2;
        f_grad_input *= term1;
        k_grad_input[l] = static_cast<T>(f_grad_input);
      }
    }
  }
}

template<typename T, typename U> 
void HostApplyLayerNorm(
    T* output,
    U* mean,
    U* invvar,
    const T* input,
    int n1,
    int n2,
    double epsilon,
    const T* gamma,
    const T* beta
    )
{
    auto stream = at::cuda::getCurrentCUDAStream().stream();
    const dim3 threads(32,4,1);
    const dim3 blocks(1,n1,1);
    int nshared = 
        threads.y > 1 ? 
	    threads.y*sizeof(U)+(threads.y/2)*sizeof(U) : 
	    0;
    cuApplyLayerNorm<<<blocks, threads, nshared, stream>>>(
		    output,
		    mean,
		    invvar,
		    input,
		    n1,n2,
		    U(epsilon),
                    gamma,beta);
}

void cuda_layer_norm(
    at::Tensor* output,
    at::Tensor* mean,
    at::Tensor* invvar,
    at::Tensor* input,
    int n1,
    int n2,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    at::Tensor* gamma,
    at::Tensor* beta,
    double epsilon)
{
    using namespace at;
    DISPATCH_DOUBLE_FLOAT_AND_HALF(input->scalar_type(), 0, "layer_norm_cuda_kernel",
        using accscalar_t = at::acc_type<scalar_t_0, true>;
        HostApplyLayerNorm(
            output->data<scalar_t_0>(),
	    mean->data<accscalar_t>(),
	    invvar->data<accscalar_t>(),
	    input->data<scalar_t_0>(),
	    n1,n2,
	    epsilon,
	    gamma != NULL ? gamma->data<scalar_t_0>() : NULL,
	    beta != NULL ? beta->data<scalar_t_0>() : NULL);
      )
}

template<typename T, typename U> 
void HostLayerNormGradient(
    const T* dout,
    const U* mean,
    const U* invvar,
    at::Tensor* input,
    int n1,
    int n2,
    const T* gamma,
    const T* beta,
    double epsilon,
    T* grad_input,
    T* grad_gamma,
    T* grad_beta
    )
{
    auto stream = at::cuda::getCurrentCUDAStream().stream();

    if (gamma != NULL && beta != NULL) {
      // compute grad_gamma(j) and grad_beta(j)
      const int part_size = 16;
      const dim3 threads2(32,4,1);
      const dim3 blocks2((n2+threads2.x-1)/threads2.x,part_size,1);
      const int nshared2_a = 2 * sizeof(U) * threads2.y * threads2.y * (threads2.x + 1);
      const int nshared2_b = threads2.x * threads2.y * sizeof(U);
      const int nshared2 = nshared2_a > nshared2_b ? nshared2_a : nshared2_b;
      at::Tensor part_grad_gamma = at::empty({part_size,n2}, input->options().dtype(input->scalar_type()==at::ScalarType::Half ? at::ScalarType::Float : input->scalar_type()));
      at::Tensor part_grad_beta = at::empty_like(part_grad_gamma);
      cuComputePartGradGammaBeta<<<blocks2, threads2, nshared2, stream>>>(
		      dout,
		      input->data<T>(),
		      n1,n2,
		      mean,
		      invvar,
		      U(epsilon),
		      part_grad_gamma.data<U>(),
		      part_grad_beta.data<U>());

      const dim3 threads3(32,8,1);
      const dim3 blocks3((n2+threads2.x-1)/threads2.x,1,1);
      const int nshared3 = threads3.x * threads3.y * sizeof(U);
      cuComputeGradGammaBeta<<<blocks3, threads3, nshared3, stream>>>(
		      part_grad_gamma.data<U>(),
		      part_grad_beta.data<U>(),
		      part_size,
		      n1,n2,
		      grad_gamma,
		      grad_beta);
    }

    // compute grad_input
    const dim3 threads1(32,4,1);
    const dim3 blocks1(1,n1,1);
    int nshared =
	    threads1.y > 1 ?
	    threads1.y*threads1.x*sizeof(U) :
	    0;
    cuComputeGradInput<<<blocks1, threads1, nshared, stream>>>(
            dout,
            input->data<T>(),
            n1,n2,
            mean,
            invvar,
            U(epsilon),
            gamma,
            grad_input);
}

void cuda_layer_norm_gradient(
    at::Tensor* dout,
    at::Tensor* mean,
    at::Tensor* invvar,
    at::Tensor* input,
    int n1,
    int n2,
    #ifdef VERSION_GE_1_1
    at::IntArrayRef normalized_shape,
    #else
    at::IntList normalized_shape,
    #endif
    at::Tensor* gamma,
    at::Tensor* beta,
    double epsilon,
    at::Tensor* grad_input,
    at::Tensor* grad_gamma,
    at::Tensor* grad_beta)
{
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input->scalar_type(), 0, "cuComputeGradInput",
        using accscalar_t = at::acc_type<scalar_t_0, true>;
        HostLayerNormGradient(
	    dout->data<scalar_t_0>(),
	    mean->data<accscalar_t>(),
	    invvar->data<accscalar_t>(),
	    input,
	    n1,n2,
	    gamma->data<scalar_t_0>(),
	    beta->data<scalar_t_0>(),
	    epsilon,
	    grad_input->data<scalar_t_0>(),
	    grad_gamma->data<scalar_t_0>(),
	    grad_beta->data<scalar_t_0>());
      )
}


================================================
FILE: apex/csrc/multi_tensor_apply.cuh
================================================
#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/Exceptions.h>

#include <assert.h>

// #include <iostream>

// This header is the one-stop shop for all your multi-tensor apply needs.


// TODO:  Kernel arg size limit may be <4KB for some other cards (ie Jetson)
constexpr int depth_to_max_tensors[5] = {110, 64, 48, 36, 30};
constexpr int depth_to_max_blocks[5] = {320, 320, 320, 320, 320};

template<int n> struct TensorListMetadata
{
  void* addresses[n][depth_to_max_tensors[n-1]];
  int sizes[depth_to_max_tensors[n-1]];
  unsigned char block_to_tensor[depth_to_max_blocks[n-1]];
  int block_to_chunk[depth_to_max_blocks[n-1]]; // I fear this needs to be a full int.
  int start_tensor_this_launch;
};


template<typename T, typename U, typename... ArgTypes>
__global__ void multi_tensor_apply_kernel(
    int chunk_size,
    volatile int* noop_flag,
    T tl,
    U callable,
    ArgTypes... args)
{
  // Hand the chunk information to the user-supplied functor to process however it likes.
  callable(chunk_size, noop_flag, tl, args...); 
}

template<int depth, typename T, typename... ArgTypes>
void multi_tensor_apply(
  int block_size,
  int chunk_size,
  const at::Tensor& noop_flag,
  const std::vector<std::vector<at::Tensor>>& tensor_lists,
  T callable,
  ArgTypes... args)
{
  AT_CHECK(tensor_lists.size() == depth, "tensor_lists.size() != depth");
  int len0 = tensor_lists[0].size();
  AT_CHECK(len0 > 0, "tensor_lists[0].size() is not > 0");

  for(int l = 0; l < tensor_lists.size(); l++) // No range-based for because I need indices
  {
    AT_CHECK(tensor_lists[l].size() == len0, "Size mismatch among tensor lists");
    for(int t = 0; t < tensor_lists[l].size(); t++)
    {
      // TODO:  Print which tensor fails.
      AT_CHECK(tensor_lists[l][t].is_contiguous(), "A tensor was not contiguous.");
      AT_CHECK(tensor_lists[l][t].is_cuda(), "A tensor was not cuda.");
      AT_CHECK(tensor_lists[l][t].numel() == tensor_lists[0][t].numel(), "Size mismatch");
    }
  }

  int ntensors = tensor_lists[0].size();

  TensorListMetadata<depth> tl;

  auto stream = at::cuda::getCurrentCUDAStream();
  
  tl.start_tensor_this_launch = 0;
  int loc_block_info = 0;
  int loc_tensor_info = 0;
  for(int t = 0; t < ntensors; t++)
  {
    tl.sizes[loc_tensor_info] = tensor_lists[0][t].numel();
    for(int d = 0; d < depth; d++)
      tl.addresses[d][loc_tensor_info] = tensor_lists[d][t].data_ptr();
    loc_tensor_info++;

    int chunks_this_tensor = (tensor_lists[0][t].numel() + chunk_size - 1)/chunk_size;

    for(int chunk = 0; chunk < chunks_this_tensor; chunk++)
    {
      // std::cout << chunks_this_tensor << std::endl;
      tl.block_to_tensor[loc_block_info] = loc_tensor_info - 1;
      tl.block_to_chunk[loc_block_info] = chunk;
      loc_block_info++;
  
      bool tensors_full = (loc_tensor_info == depth_to_max_tensors[depth-1] &&
                           chunk == chunks_this_tensor - 1);
      bool blocks_full = (loc_block_info == depth_to_max_blocks[depth-1]);
      bool last_chunk = (t == ntensors - 1 && chunk == chunks_this_tensor - 1);
      if(tensors_full || blocks_full || last_chunk)
      {
        // using accscalar_t = acc_type<scalar_t, true>;
        multi_tensor_apply_kernel<<<loc_block_info, block_size, 0, stream>>>(
          chunk_size,
          noop_flag.data<int>(),
          tl,
          callable,
          args...);

        AT_CUDA_CHECK(cudaGetLastError());

        // Reset.  The control flow possibilities here make my brain hurt.
        loc_block_info = 0;
        if(chunk == chunks_this_tensor - 1)
        {
          // std::cout << "Hit case 1 " << cond1 << " " << cond2 << " " << cond3 << std::endl;
          loc_tensor_info = 0; 
          tl.start_tensor_this_launch = t + 1;
        }
        else
        {
          // std::cout << "Hit case 2 " << cond1 << " " << cond2 << " " << cond3 << std::endl;
          tl.sizes[0] = tl.sizes[loc_tensor_info-1];
          for(int d = 0; d < depth; d++)
            tl.addresses[d][0] = tl.addresses[d][loc_tensor_info-1];
          loc_tensor_info = 1;
          tl.start_tensor_this_launch = t;
        }
      }
    }
  }
}


================================================
FILE: apex/csrc/multi_tensor_axpby_kernel.cu
================================================
#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/Exceptions.h>
// Another possibility:
// #include <torch/all.h>

#include <assert.h>

#include "type_shim.h"
#include "multi_tensor_apply.cuh"

#define BLOCK_SIZE 512
#define ILP 4

template<typename x_t, typename y_t, typename out_t>
struct AxpbyFunctor
{
   __device__ __forceinline__ void operator()(
    int chunk_size,
    volatile int* noop_gmem,
    TensorListMetadata<3>& tl,
    float a,
    float b,
    int arg_to_check)
  {
    // I'd like this kernel to propagate infs/nans.
    // if(*noop_gmem == 1)
    //   return;

    int tensor_loc = tl.block_to_tensor[blockIdx.x];
    int chunk_idx = tl.block_to_chunk[blockIdx.x];
    int n = tl.sizes[tensor_loc];

    x_t* x = (x_t*)tl.addresses[0][tensor_loc];
    x += chunk_idx*chunk_size;

    y_t* y = (y_t*)tl.addresses[1][tensor_loc];
    y += chunk_idx*chunk_size;

    out_t* out = (out_t*)tl.addresses[2][tensor_loc];
    out += chunk_idx*chunk_size;

    n -= chunk_idx*chunk_size;

    // Non-divergent exit condition for __syncthreads, not necessary here
    float xs[ILP];
    float ys[ILP];
    for(int i_start = 0;
        i_start < n && i_start < chunk_size;
        i_start += blockDim.x*ILP)
    {
      #pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        xs[ii] = 0;
        ys[ii] = 0;
        int i = i_start + threadIdx.x + ii*blockDim.x;
        if(i < n && i < chunk_size)
        {
          xs[ii] = static_cast<float>(x[i]);
          ys[ii] = static_cast<float>(y[i]);
        }
      }

      // see note in multi_tensor_scale_kernel.cu
      #pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        int i = i_start + threadIdx.x + ii*blockDim.x;
        if(i < n && i < chunk_size)
        {
          out[i] = static_cast<out_t>(a*xs[ii] + b*ys[ii]);
          bool finite = true;
          if(arg_to_check == -1)
            finite = (isfinite(xs[ii]) && isfinite(ys[ii]));
          if(arg_to_check == 0)
            finite = isfinite(xs[ii]);
          if(arg_to_check == 1)
            finite = isfinite(ys[ii]);
          if(!finite)
            *noop_gmem = 1; // Blindly fire off a write.  These will race but that's ok.
        }
      }
    }
  }
};

void multi_tensor_axpby_cuda(
  int chunk_size,
  at::Tensor noop_flag,
  std::vector<std::vector<at::Tensor>> tensor_lists,
  float a,
  float b,
  int arg_to_check)
{
  using namespace at;
  // The output (downscaled) type is always float.
  // If build times suffer, think about where to put this dispatch,
  // and what logic should be moved out of multi_tensor_apply.

  DISPATCH_FLOAT_AND_HALF(tensor_lists[0][0].scalar_type(), 0, "multi_tensor_axpby_cuda",
    DISPATCH_FLOAT_AND_HALF(tensor_lists[1][0].scalar_type(), 1, "multi_tensor_axpby_cuda",
      DISPATCH_FLOAT_AND_HALF(tensor_lists[2][0].scalar_type(), 2, "multi_tensor_axpby_cuda",
           multi_tensor_apply<3>(
             BLOCK_SIZE,
             chunk_size,
             noop_flag,
             tensor_lists,
             AxpbyFunctor<scalar_t_0, scalar_t_1, scalar_t_2>(),
             a,
             b,
             arg_to_check); )))

  AT_CUDA_CHECK(cudaGetLastError());

  // AT_CUDA_CHECK(cudaDeviceSynchronize());
}


================================================
FILE: apex/csrc/multi_tensor_l2norm_kernel.cu
================================================
#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/Exceptions.h>
// Another possibility:
// #include <torch/all.h>

#include <assert.h>

#include "type_shim.h"
#include "multi_tensor_apply.cuh"

#define BLOCK_SIZE 512
#define ILP 4

template<typename x_t>
struct L2NormFunctor
{
  __device__ __forceinline__ void operator()(
    int chunk_size,
    volatile int* noop_gmem,
    TensorListMetadata<1>& tl,
    float* output,
    float* output_per_tensor,
    bool per_tensor,
    int max_chunks_per_tensor)
  {
    // I'd like this kernel to propagate infs/nans.
    // if(*noop_gmem == 1)
    //   return;

    int tensor_loc = tl.block_to_tensor[blockIdx.x];
    int chunk_idx = tl.block_to_chunk[blockIdx.x];
    int n = tl.sizes[tensor_loc];

    x_t* x = (x_t*)tl.addresses[0][tensor_loc];
    x += chunk_idx*chunk_size;

    n -= chunk_idx*chunk_size;

    __shared__ float s_vals[512];

    float vals[ILP]; // = {0}; // this probably works too but I want to be sure...
    for(int i = 0; i < ILP; i++)
      vals[i] = 0.f;

    for(int i_start = 0; i_start < n && i_start < chunk_size; i_start += blockDim.x*ILP)
    {
      #pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        int i = i_start + threadIdx.x + ii*blockDim.x;
        if(i < n && i < chunk_size)
        {
          float next = static_cast<float>(x[i]);
          vals[ii] += next*next;
        }
      }
    }

    float val = 0.f;
    for(int i = 0; i < ILP; i++)
        val += vals[i];

    float final = reduce_block_into_lanes(s_vals, val);

    if(threadIdx.x == 0)
    {
      if(!isfinite(final))
        *noop_gmem = 1; // Blindly fire off a write.  These will race but that's ok.
      output[blockIdx.x] += final;
      if(per_tensor)
        output_per_tensor[(tl.start_tensor_this_launch + tensor_loc)*max_chunks_per_tensor + chunk_idx] = final;
    }
  }
};


__global__ void cleanup(
  float* output,
  float* output_per_tensor,
  float* ret,
  float* ret_per_tensor,
  bool per_tensor,
  int max_chunks_per_tensor)
{
  __shared__ float vals[512];

  if(blockIdx.x == 0)
  {
    float val = 0;
    if(threadIdx.x < 320)
      val = output[threadIdx.x];

    float final = reduce_block_into_lanes(vals, val);

    if(threadIdx.x == 0)
      *ret = sqrt(final);
  }

  if(per_tensor)
  {
    float* output_this_tensor = output_per_tensor + blockIdx.x*max_chunks_per_tensor;

    float val = 0;
    for(int i = threadIdx.x; i < max_chunks_per_tensor; i += blockDim.x)
      val += output_this_tensor[i];

    float final = reduce_block_into_lanes(vals, val);

    if(threadIdx.x == 0)
      ret_per_tensor[blockIdx.x] = sqrt(final);
  }
}


std::tuple<at::Tensor, at::Tensor> multi_tensor_l2norm_cuda(
  int chunk_size,
  at::Tensor noop_flag,
  std::vector<std::vector<at::Tensor>> tensor_lists,
  at::optional<bool> per_tensor_python)
{
  bool per_tensor = per_tensor_python.has_value() ? per_tensor_python.value() : false;

  auto float_options = tensor_lists[0][0].options().dtype(at::kFloat);
  auto output = at::zeros({320}, float_options);

  at::Tensor output_per_tensor;
  at::Tensor ret_per_tensor;

  int ntensors = tensor_lists[0].size();
  int max_chunks_per_tensor = -1;

  if(per_tensor)
  {
    for(int t = 0; t < ntensors; t++)
    {
      int max_chunks_this_tensor = (tensor_lists[0][t].numel() + chunk_size - 1)/chunk_size;
      if(max_chunks_this_tensor > max_chunks_per_tensor)
        max_chunks_per_tensor = max_chunks_this_tensor;
    }
    output_per_tensor = at::zeros({ntensors*max_chunks_per_tensor}, float_options);
    ret_per_tensor = at::empty({ntensors}, float_options);
  }
  else
  {
    ret_per_tensor = at::empty({0}, float_options);
  }

  DISPATCH_FLOAT_AND_HALF(tensor_lists[0][0].scalar_type(), 0, "multi_tensor_l2norm_cuda",
    multi_tensor_apply<1>(
      BLOCK_SIZE,
      chunk_size,
      noop_flag,
      tensor_lists,
      L2NormFunctor<scalar_t_0>(),
      output.data<float>(),
      per_tensor ? output_per_tensor.data<float>() : nullptr,
      per_tensor,
      max_chunks_per_tensor);)

  AT_CUDA_CHECK(cudaGetLastError());

  // AT_CUDA_CHECK(cudaDeviceSynchronize());

  // This involves one more small kernel launches, but will be negligible end to end.
  // I could get rid of these by hacking the functor + multi tensor harness with persistence
  // logic, but keeping it simple for now
  auto ret = at::empty({1}, output.options());
  auto stream = at::cuda::getCurrentCUDAStream();
  cleanup<<<per_tensor ? ntensors : 1, 512, 0, stream>>>(
    output.data<float>(),
    per_tensor ? output_per_tensor.data<float>() : nullptr,
    ret.data<float>(),
    per_tensor ? ret_per_tensor.data<float>() : nullptr,
    per_tensor,
    max_chunks_per_tensor);

  return std::tuple<at::Tensor, at::Tensor>(ret, ret_per_tensor);
}


================================================
FILE: apex/csrc/multi_tensor_lamb_stage_1.cu
================================================
#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/Exceptions.h>
// Another possibility:
// #include <torch/all.h>

#include <assert.h>

#include "type_shim.h"
#include "multi_tensor_apply.cuh"

#define BLOCK_SIZE 512
#define ILP 4

// Step 1 computes the 'update' value of regular Adam optimizer.
template<typename GRAD_T, typename T>
struct LAMBStage1Functor
{
   __device__ __forceinline__ void operator()(
    int chunk_size,
    volatile int* noop_gmem,
    TensorListMetadata<5>& tl,
    const float* per_tensor_decay,
    const float beta1,
    const float beta2,
    const float beta1_correction,
    const float beta2_correction,
    const float epsilon,
    const float clipped_global_grad_norm)
  {
    // I'd like this kernel to propagate infs/nans.
    // if(*noop_gmem == 1)
    //   return;

    int tensor_loc = tl.block_to_tensor[blockIdx.x];
    int tensor_num = tl.start_tensor_this_launch + tensor_loc;
    int chunk_idx = tl.block_to_chunk[blockIdx.x];
    int n = tl.sizes[tensor_loc];

    float decay = per_tensor_decay[tensor_num];

    GRAD_T* g = (GRAD_T*)tl.addresses[0][tensor_loc];
    g += chunk_idx*chunk_size;

    T* p = (T*)tl.addresses[1][tensor_loc];
    p += chunk_idx*chunk_size;

    T* m = (T*)tl.addresses[2][tensor_loc];
    m += chunk_idx*chunk_size;

    T* v = (T*)tl.addresses[3][tensor_loc];
    v += chunk_idx*chunk_size;

    T* update = (T*)tl.addresses[4][tensor_loc];
    update += chunk_idx*chunk_size;

    n -= chunk_idx*chunk_size;

    // see note in multi_tensor_scale_kernel.cu
    for(int i_start = 0;
            i_start < n && i_start < chunk_size;
            i_start += blockDim.x*ILP)
    {
      GRAD_T r_g[ILP];
      T r_p[ILP];
      T r_m[ILP];
      T r_v[ILP];
#pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        int i = i_start + threadIdx.x + ii*blockDim.x;
        if(i < n && i < chunk_size)
        {
          r_g[ii] = g[i];
          r_p[ii] = p[i];
          r_m[ii] = m[i];
          r_v[ii] = v[i];
        } else {
          r_g[ii] = GRAD_T(0);
          r_p[ii] = T(0);
          r_m[ii] = T(0);
          r_v[ii] = T(0);
        }
      }
#pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        T scaled_grad = r_g[ii] / clipped_global_grad_norm;
        r_m[ii] = r_m[ii] * beta1 + (1-beta1) * scaled_grad;
        r_v[ii] = r_v[ii] * beta2 + (1-beta2) * scaled_grad * scaled_grad;
        T next_m_unbiased = r_m[ii] / beta1_correction;
        T next_v_unbiased = r_v[ii] / beta2_correction;
        T denom = std::sqrt(next_v_unbiased) + epsilon;
        r_p[ii] = (next_m_unbiased/denom) + (decay*r_p[ii]);
      }
#pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        int i = i_start + threadIdx.x + ii*blockDim.x;
        if(i < n && i < chunk_size)
        {
          update[i] = r_p[ii];
          m[i] = r_m[ii];
          v[i] = r_v[ii];
        }
      }
    }
  }
};

void multi_tensor_lamb_stage1_cuda(
  int chunk_size,
  at::Tensor noop_flag,
  std::vector<std::vector<at::Tensor>> tensor_lists,
  at::Tensor per_tensor_decay,
  const int step,
  const float beta1,
  const float beta2,
  const float epsilon,
  const float global_grad_norm,
  const float max_global_grad_norm)
{
  using namespace at;

  float clipped_global_grad_norm = global_grad_norm > max_global_grad_norm ? global_grad_norm / max_global_grad_norm : 1.0f;
  float next_step = float(step+1);
  float beta1_correction = 1.0f - std::pow(beta1, next_step);
  float beta2_correction = 1.0f - std::pow(beta2, next_step);
  DISPATCH_FLOAT_AND_HALF(tensor_lists[0][0].scalar_type(), 0, "lamb_stage_1",
    DISPATCH_FLOAT_AND_HALF(tensor_lists[1][0].scalar_type(), 1, "lamb_stage_1",
      multi_tensor_apply<5>(
        BLOCK_SIZE,
        chunk_size,
        noop_flag,
        tensor_lists,
        LAMBStage1Functor<scalar_t_0, scalar_t_1>(),
        per_tensor_decay.data<float>(),
        beta1,
        beta2,
        beta1_correction,
        beta2_correction,
        epsilon,
        clipped_global_grad_norm); ))

  AT_CUDA_CHECK(cudaGetLastError());

  // AT_CUDA_CHECK(cudaDeviceSynchronize());
}


================================================
FILE: apex/csrc/multi_tensor_lamb_stage_2.cu
================================================
#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/Exceptions.h>
// Another possibility:
// #include <torch/all.h>

#include <assert.h>

#include "type_shim.h"
#include "multi_tensor_apply.cuh"

#define BLOCK_SIZE 512
#define ILP 4

// Step 2 reads in 'update' value and per-tensor param_norm and update_norm.
// It computes new parameter value.
template<typename T>
struct LAMBStage2Functor
{
   __device__ __forceinline__ void operator()(
    int chunk_size,
    volatile int* noop_gmem,
    TensorListMetadata<2>& tl,
    const float* per_tensor_param_norm,
    const float* per_tensor_update_norm,
    const float learning_rate)
  {
    // I'd like this kernel to propagate infs/nans.
    // if(*noop_gmem == 1)
    //   return;

    int tensor_loc = tl.block_to_tensor[blockIdx.x];
    int tensor_num = tl.start_tensor_this_launch + tensor_loc;
    int chunk_idx = tl.block_to_chunk[blockIdx.x];
    int n = tl.sizes[tensor_loc];

    float param_norm = per_tensor_param_norm[tensor_num];
    float update_norm = per_tensor_update_norm[tensor_num];
    T ratio = (update_norm != 0.0f && param_norm != 0.0f) ? learning_rate * (param_norm / update_norm) : learning_rate;

    T* p = (T*)tl.addresses[0][tensor_loc];
    p += chunk_idx*chunk_size;

    T* update = (T*)tl.addresses[1][tensor_loc];
    update += chunk_idx*chunk_size;

    n -= chunk_idx*chunk_size;

    for(int i_start = 0;
            i_start < n && i_start < chunk_size;
            i_start += blockDim.x*ILP)
    {
      T r_p[ILP];
      T r_update[ILP];
#pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        int i = i_start + threadIdx.x + ii*blockDim.x;
        if(i < n && i < chunk_size)
        {
          r_p[ii] = p[i];
          r_update[ii] = update[i];
        }
      }
#pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        r_p[ii] = r_p[ii] - (ratio*r_update[ii]);
      }
#pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        int i = i_start + threadIdx.x + ii*blockDim.x;
        if(i < n && i < chunk_size)
        {
          p[i] = r_p[ii];
        }
      }
    }
  }
};

void multi_tensor_lamb_stage2_cuda(
  int chunk_size,
  at::Tensor noop_flag,
  std::vector<std::vector<at::Tensor>> tensor_lists,
  at::Tensor per_tensor_param_norm,
  at::Tensor per_tensor_update_norm,
  const float learning_rate)
{
  using namespace at;

  DISPATCH_FLOAT_AND_HALF(tensor_lists[0][0].scalar_type(), 0, "lamb_stage_2",
      multi_tensor_apply<2>(
        BLOCK_SIZE,
        chunk_size,
        noop_flag,
        tensor_lists,
        LAMBStage2Functor<scalar_t_0>(),
        per_tensor_param_norm.data<float>(),
        per_tensor_update_norm.data<float>(),
        learning_rate); )

  AT_CUDA_CHECK(cudaGetLastError());

  // AT_CUDA_CHECK(cudaDeviceSynchronize());
}


================================================
FILE: apex/csrc/multi_tensor_scale_kernel.cu
================================================
#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/Exceptions.h>
// Another possibility:
// #include <torch/all.h>

#include <assert.h>
// Stringstream is a big hammer, but I want to rely on operator<< for dtype.
#include <sstream>

#include "type_shim.h"
#include "multi_tensor_apply.cuh"

#define BLOCK_SIZE 512
#define ILP 4

template<typename in_t, typename out_t>
struct ScaleFunctor
{
   __device__ __forceinline__ void operator()(
    int chunk_size,
    volatile int* noop_gmem,
    TensorListMetadata<2>& tl,
    float scale)
  {
    // I'd like this kernel to propagate infs/nans.
    // if(*noop_gmem == 1)
    //   return;

    int tensor_loc = tl.block_to_tensor[blockIdx.x];
    int chunk_idx = tl.block_to_chunk[blockIdx.x];
    int n = tl.sizes[tensor_loc];

    in_t* in = (in_t*)tl.addresses[0][tensor_loc];
    in += chunk_idx*chunk_size;
   
    out_t* out = (out_t*)tl.addresses[1][tensor_loc];
    out += chunk_idx*chunk_size;

    n -= chunk_idx*chunk_size;

    // Non-divergent exit condition for __syncthreads, not necessary here
    float incoming_vals[ILP];
    for(int i_start = 0;
        i_start < n && i_start < chunk_size;
        i_start += blockDim.x*ILP)
    {
      #pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        incoming_vals[ii] = 0;
        int i = i_start + threadIdx.x + ii*blockDim.x;
        if(i < n && i < chunk_size)
          incoming_vals[ii] = static_cast<float>(in[i]);
      }

      // note for clarification to future michael:
      // From a pure memory dependency perspective, there's likely no point unrolling
      // the write loop, since writes just fire off once their LDGs arrive.
      // Put another way, the STGs are dependent on the LDGs, but not on each other.
      // There is still compute ILP benefit from unrolling the loop though.
      #pragma unroll
      for(int ii = 0; ii < ILP; ii++)
      {
        int i = i_start + threadIdx.x + ii*blockDim.x;
        if(i < n && i < chunk_size)
        {
          out[i] = static_cast<out_t>(incoming_vals[ii]*scale);
          if(!isfinite(incoming_vals[ii]))
            *noop_gmem = 1; // Blindly fire off a write.  These will race but that's ok.
        }
      }
    }
  }
};

void multi_tensor_scale_cuda(
  int chunk_size,
  at::Tensor noop_flag,
  std::vector<std::vector<at::Tensor>> tensor_lists,
  float scale)
{
  using namespace at;
  // The output (downscaled) type is always float.
  // If build times suffer, think about where to put this dispatch,
  // and what logic should be moved out of multi_tensor_apply.

  DISPATCH_FLOAT_AND_HALF(tensor_lists[0][0].scalar_type(), 0, "multi_tensor_scale_cuda",
    DISPATCH_FLOAT_AND_HALF(tensor_lists[1][0].scalar_type(), 1, "multi_tensor_scale_cuda",
      multi_tensor_apply<2>(
        BLOCK_SIZE,
        chunk_size,
        noop_flag,
        tensor_lists,
        ScaleFunctor<scalar_t_0, scalar_t_1>(),
        scale); ))
  AT_CUDA_CHECK(cudaGetLastError());

  // AT_CUDA_CHECK(cudaDeviceSynchronize());
}


================================================
FILE: apex/csrc/syncbn.cpp
================================================
#include <torch/extension.h>
#include <ATen/ATen.h>

#include <vector>

// returns {mean,biased_var}
// implemented using welford 
std::vector<at::Tensor> welford_mean_var_CUDA(const at::Tensor input);

// reduces array of mean/var across processes
// returns global {mean,inv_std,biased_var}
// implemented using welford 
std::vector<at::Tensor> welford_parallel_CUDA(const at::Tensor mean_feature_nodes,
                                              const at::Tensor var_biased_feature_nodes,
                                              int numel,
                                              const float eps);

// elementwise BN operation, returns output
// input/weight/shift should have identical data type;
// mean/inv_std have promoted data type (dtype==fp16?fp32:dtype)
at::Tensor batchnorm_forward_CUDA(const at::Tensor input,
                                  const at::Tensor mean,
                                  const at::Tensor inv_std,
                                  const at::optional<at::Tensor> weight,
                                  const at::optional<at::Tensor> shift);

// backward BN operation, returns {mean_dy, mean_dy_xmu, grad_weight, grad_bias}
// grad_output/input should have identical data type;
// mean/inv_std have promoted data type (dtype==fp16?fp32:dtype)
// implemented using kahan summation
std::vector<at::Tensor> reduce_bn_CUDA(const at::Tensor grad_output,
                                           const at::Tensor input,
                                           const at::Tensor mean,
                                           const at::Tensor inv_std,
                                           const at::optional<at::Tensor> weight);

// elementwise backward BN operation, returns grad_input
// grad_output/input/weight precision could be fp16/fp32;
// mean/inv_std/mean_dy/mean_dy_xmu precision is fp32
at::Tensor batchnorm_backward_CUDA(const at::Tensor grad_output,
                                   const at::Tensor input,
                                   const at::Tensor mean,
                                   const at::Tensor inv_std,
                                   const at::optional<at::Tensor> weight,
                                   const at::Tensor mean_dy,
                                   const at::Tensor mean_dy_xmu);

// returns {mean, biased_var}
// implemented using welford 
// expect data to be in n+c format (channel last) and applies CUDNN_BATCHNORM_SPATIAL
std::vector<at::Tensor> welford_mean_var_c_last_CUDA(const at::Tensor input);

// elementwise BN operation, returns output
// input/weight/shift should have identical data type;
// mean/inv_std have promoted data type (dtype==fp16?fp32:dtype)
// expect data to be in n+c format (channel last) and applies CUDNN_BATCHNORM_SPATIAL
at::Tensor batchnorm_forward_c_last_CUDA(const at::Tensor input,
                                         const at::Tensor mean,
                                         const at::Tensor inv_std,
                                         const at::optional<at::Tensor> weight,
                                         const at::optional<at::Tensor> shift);

// backward BN operation, returns {mean_dy, mean_dy_xmu, grad_weight, grad_bias}
// grad_output/input should have identical data type;
// mean/inv_std have promoted data type (dtype==fp16?fp32:dtype)
// expect data to be in n+c format (channel last) and applies CUDNN_BATCHNORM_SPATIAL
std::vector<at::Tensor> reduce_bn_c_last_CUDA(const at::Tensor grad_output,
                                              const at::Tensor input,
                                              const at::Tensor mean,
                                              const at::Tensor inv_std,
                                              const at::optional<at::Tensor> weight);

// elementwise backward BN operation, returns grad_input
// grad_output/input/weight precision could be fp16/fp32;
// mean/inv_std/mean_dy/mean_dy_xmu precision is fp32
// expect data to be in n+c format (channel last) and applies CUDNN_BATCHNORM_SPATIAL
at::Tensor batchnorm_backward_c_last_CUDA(const at::Tensor grad_output,
                                          const at::Tensor input,
                                          const at::Tensor mean,
                                          const at::Tensor inv_std,
                                          const at::optional<at::Tensor> weight,
                                          const at::Tensor mean_dy,
                                          const at::Tensor mean_dy_xmu);

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("welford_mean_var", &welford_mean_var_CUDA, "welford mean variance");
  m.def("welford_parallel", &welford_parallel_CUDA, "welford parallel reduce mean variance");
  m.def("batchnorm_forward", &batchnorm_forward_CUDA, "batchnorm forward");
  m.def("reduce_bn", &reduce_bn_CUDA, "batchnorm backward reduce grad sum and bias/weight grad");
  m.def("batchnorm_backward", &batchnorm_backward_CUDA, "batchnorm backward dgrad");
  m.def("welford_mean_var_c_last", &welford_mean_var_c_last_CUDA, "welford mean variance nhwc");
  m.def("batchnorm_forward_c_last", &batchnorm_forward_c_last_CUDA, "batchnorm forward nhwc");
  m.def("reduce_bn_c_last", &reduce_bn_c_last_CUDA, "batchnorm backwards reduce grad sum and bias/weight grad nhwc");
  m.def("batchnorm_backward_c_last", &batchnorm_backward_c_last_CUDA, "batchnorm backward dgrad nhwc");
}


================================================
FILE: apex/csrc/type_shim.h
================================================
#include <ATen/ATen.h>

// Forward/backward compatiblity hack around
// https://github.com/pytorch/pytorch/commit/3aeb78079bcd68282fe9117088e138b77318e288
// pending more future-proof guidance from upstream.
// struct TypeShim
// {
//   const at::Type& payload;
//   TypeShim(const at::Type& type) : payload(type) {}
//   // Enable trivial conversion to a const at::Type& for pre-3aeb78
//   operator const at::Type&(){ return payload; };
//   // Enable dispatch switch statements to take *this directly for  post-3aeb78
//   //operator at::ScalarType(){ return payload.; };
// };

#define DISPATCH_FLOAT_AND_HALF(TYPE, LEVEL, NAME, ...) \
  switch(TYPE) \
  { \
    case at::ScalarType::Float: \
    { \
      using scalar_t_##LEVEL = float; \
      __VA_ARGS__; \
      break; \
    } \
    case at::ScalarType::Half: \
    { \
      using scalar_t_##LEVEL = at::Half; \
      __VA_ARGS__; \
      break; \
    } \
    default: \
      AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'");  \
  }


#define DISPATCH_DOUBLE_FLOAT_AND_HALF(TYPE, LEVEL, NAME, ...) \
  switch(TYPE) \
  { \
    case at::ScalarType::Double: \
    { \
      using scalar_t_##LEVEL = double; \
      __VA_ARGS__; \
      break; \
    } \
    case at::ScalarType::Float: \
    { \
      using scalar_t_##LEVEL = float; \
      __VA_ARGS__; \
      break; \
    } \
    case at::ScalarType::Half: \
    { \
      using scalar_t_##LEVEL = at::Half; \
      __VA_ARGS__; \
      break; \
    } \
    default: \
      AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'");  \
  }


  #define DISPATCH_DOUBLE_AND_FLOAT(TYPE, LEVEL, NAME, ...) \
  switch(TYPE) \
  { \
    case at::ScalarType::Double: \
    { \
      using scalar_t_##LEVEL = double; \
      __VA_ARGS__; \
      break; \
    } \
    case at::ScalarType::Float: \
    { \
      using scalar_t_##LEVEL = float; \
      __VA_ARGS__; \
      break; \
    } \
    default: \
      AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'");  \
  }


template<typename T>
__device__ __forceinline__ T reduce_block_into_lanes
  (T *x,
   T val,
   int lanes=1,
   bool share_result=false) // lanes is intended to be <= 32.
{
  int tid = threadIdx.x + threadIdx.y*blockDim.x;
  int blockSize = blockDim.x*blockDim.y; // blockSize is intended to be a multiple of 32.

  if(blockSize >= 64)
  {
    x[tid] = val;
    __syncthreads();
  }

  #pragma unroll
  for(int i = (blockSize >> 1); i >= 64; i >>= 1)
  {
    if(tid < i)
      x[tid] = x[tid] + x[tid+i];
    __syncthreads();
  }

  T final;

  if(tid < 32)
  {
    if(blockSize >= 64)
      final = x[tid] + x[tid+32];
    else
      final = val;
    // __SYNCWARP();

    #pragma unroll
    for(int i = 16; i >= lanes; i >>= 1)
      final = final + __shfl_down_sync(0xffffffff, final, i);
  }

  if(share_result)
  {
    if(tid < lanes)
      x[tid] = final; // EpilogueOp
    // Make sure the smem result is visible to all warps.
    __syncthreads();
  }

  return final;
}


================================================
FILE: apex/csrc/welford.cu
================================================
#include <iostream>
#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/cuda/CUDAContext.h>

#include <cuda.h>
#include <cuda_runtime.h>

#include <vector>

#include "type_shim.h"


__device__ __forceinline__ int lastpow2(int n)
{
  int out = 1 << (31 - __clz(n));
  if(n == out)
    out >>= 1;
  return out;
}

__host__ __forceinline__ int h_next_pow2(unsigned int n) {
    n--;
    n |= (n >>  1);
    n |= (n >>  2);
    n |= (n >>  4);
    n |= (n >>  8);
    n |= (n >> 16);
    return ++n;
}

__host__ __forceinline__ int h_last_pow2(unsigned int n) {
    n |= (n >>  1);
    n |= (n >>  2);
    n |= (n >>  4);
    n |= (n >>  8);
    n |= (n >> 16);
    return n - (n >> 1);
}


#define WARP_SIZE 32

template<typename T>
__device__ __forceinline__ T warp_reduce_sum(T val)
{
  #pragma unroll
  for(int i = WARP_SIZE/2; i > 0; i >>= 1)
    val = val + __shfl_down_sync(0xffffffff, val, i);
  return val;
}

template<typename T>
__device__ __forceinline__ T reduce_block(T *x, T val)
{
  int tid = threadIdx.y*blockDim.x + threadIdx.x;
  int blockSize = blockDim.x * blockDim.y;

  if (blockSize > 32) {
    val = warp_reduce_sum(val);
    if (tid % WARP_SIZE == 0)
      x[tid/WARP_SIZE] = val;

    __syncthreads();

    val = (tid < blockSize / WARP_SIZE? x[tid%WARP_SIZE] : T(0));
  }

  if(tid/WARP_SIZE==0) val = warp_reduce_sum(val);

  return val;
}

#define ELEMENTS_PER_ITER 4 // enables concurrency within each thread to hide latency
#define ELEMENTS_PER_THREAD 16
#define OPTIMAL_TILE_W 32
#define MAX_H_BLOCK 128
#define MAX_BLOCK_SIZE 512

__host__ int div_ru(int x, int y) {
  return h_last_pow2(1 + (x-1)/y);
}

__host__ void flexible_launch_configs(
      const int reduction,
      const int stride,
      dim3 &block,
      dim3 &grid,
      const bool coop_flag = false) {
  int block_x = std::min(h_last_pow2(stride), OPTIMAL_TILE_W);
  int block_y = std::min(h_last_pow2(div_ru(reduction , ELEMENTS_PER_THREAD)),
                         MAX_BLOCK_SIZE / block_x);
  if (block_x * block_y != MAX_BLOCK_SIZE) {
    block_x = std::min(h_last_pow2(stride), MAX_BLOCK_SIZE / block_y);
  }

  int grid_x = div_ru(stride, block_x);
  int grid_y = std::min(div_ru(reduction, block_y * ELEMENTS_PER_THREAD), MAX_H_BLOCK);
  if (coop_flag) {
    // it's not worth having a grid reduction if the reduction dimension is not big enough
    grid_y = grid_y < 8 ? 1 : grid_y;
  }

  block.x = block_x;
  block.y = block_y;
  block.z = 1;
  grid.x = grid_x;
  grid.y = grid_y;
  grid.z = 1;
}

template<typename T, typename C>
__device__ __forceinline__ void welford_merge_element(C& count,
                                                      T& mean,
                                                      T& m2n,
                                                      const C& num_new,
                                                      const T& mean_new,
                                                      const T& m2n_new) {
      T factor = T(1.0) / max(1, (count + num_new));
      T delta0 = mean - mean_new;
      mean = (mean_new * num_new + mean * count) * factor;
      m2n += m2n_new + delta0 * delta0 * num_new * count * factor;
      count += num_new;
}

template<typename T>
__device__ __forceinline__ void warp_reduce_mean_m2n(T &mean, T &m2n, int &num)
{
  #pragma unroll
  for(int i = WARP_SIZE/2; i > 0; i >>= 1) {
    auto num_new = __shfl_down_sync(0xffffffff, num, i);
    auto mean_new = __shfl_down_sync(0xffffffff, mean, i);
    auto m2n_new = __shfl_down_sync(0xffffffff, m2n, i);
    welford_merge_element(num, mean, m2n, num_new, mean_new, m2n_new);
  }
}

template <typename T>
__device__ void welford_reduce_mean_m2n(
      T* __restrict__ x,
      int* __restrict__ count,
      T &mean,
      T &m2n,
      int &num,
      int block_size,
      int thread_id)
{
  int lane = thread_id % WARP_SIZE;
  int wid = thread_id / WARP_SIZE;

  if (block_size > 32) {
    warp_reduce_mean_m2n(mean, m2n, num);
    if (lane == 0) {
      x[wid*2] = mean;
      x[wid*2+1] = m2n;
      count[wid] = num;
    }
    __syncthreads();

    if (wid == 0) {
      mean = (thread_id < block_size / WARP_SIZE)? x[lane*2] : T(0);
      m2n = (thread_id < block_size / WARP_SIZE)? x[lane*2+1] : T(0);
      num = (thread_id < block_size / WARP_SIZE)? count[lane] : int(0);
    }
  }

  if (wid==0) warp_reduce_mean_m2n(mean, m2n, num);

  return;
}

// return spatial size for NC+ Tensors
__host__ int get_tensor_spatial_size(const at::Tensor& input)
{
  auto space_size = input.size(2);
  for (int i = 3; i < input.ndimension(); i++) {
    space_size *= input.size(i);
  }
  return space_size;
}

// promote accumulation scalar type. promote half to float.
__host__ at::ScalarType promote_scalartype(const at::Tensor& input)
{
  return input.scalar_type() == at::ScalarType::Half ?
           at::ScalarType::Float : input.scalar_type();
}

// return single element size, optional accumulation type promotion.
__host__ size_t get_element_data_size(const at::Tensor& input, bool accumulation = false)
{
  auto scalar_type = accumulation ? promote_scalartype(input) : input.scalar_type();
  return at::elementSize(scalar_type);
}

template<typename T, typename C>
__device__ __forceinline__ void welford_merge_block_vertical(C& count,
                                                             T& mean,
                                                             T& m2n,
                                                             C* shmem_count,
                                                             T* shmem_mean,
                                                             T* shmem_m2n) {
  // write to shared memory
  auto address_base = threadIdx.x + threadIdx.y * blockDim.x;
  shmem_mean[address_base] = mean;
  shmem_m2n[address_base] = m2n;
  shmem_count[address_base] = count;

#pragma unroll
  for (int offset = blockDim.y/2; offset > 0; offset >>= 1) {
    __syncthreads();
    if (threadIdx.y < offset && threadIdx.y + offset < blockDim.y) {
      auto address = address_base + offset * blockDim.x;
      // read shared memory back to register for reduction
      auto num_new = shmem_count[address];
      auto mean_new = shmem_mean[address];
      auto m2n_new = shmem_m2n[address];

      welford_merge_element(count, mean, m2n, num_new, mean_new, m2n_new);

      // last write is not necessary
      shmem_mean[address_base] = mean;
      shmem_m2n[address_base] = m2n;
      shmem_count[address_base] = count;
    }
  }
}

template<typename T>
__device__ __forceinline__ void merge_block_vertical(T& sum_dy,
                                                     T& sum_dy_xmu,
                                                     T* shmem_sum_dy,
                                                     T* shmem_sum_dy_xmu) {
  // write to shared memory
  auto address_base = threadIdx.x + threadIdx.y * blockDim.x;
  shmem_sum_dy[address_base] = sum_dy;
  shmem_sum_dy_xmu[address_base] = sum_dy_xmu;

#pragma unroll
  for (int offset = blockDim.y/2; offset > 0; offset >>= 1) {
    __syncthreads();
    if (threadIdx.y < offset && threadIdx.y + offset < blockDim.y) {
      auto address = address_base + offset * blockDim.x;

      sum_dy += shmem_sum_dy[address];
      sum_dy_xmu += shmem_sum_dy_xmu[address];

      // last write is not necessary
      shmem_sum_dy[address_base] = sum_dy;
      shmem_sum_dy_xmu[address_base] = sum_dy_xmu;
    }
  }
}


// welford kernel calculating mean/biased_variance/unbiased_variance
template <typename scalar_t, typename accscalar_t, typename outscalar_t>
__global__ void welford_kernel(
      const scalar_t* __restrict__ input,
      outscalar_t* __restrict__ out_mean,
      outscalar_t* __restrict__ out_var_biased,
      const int bs,
      const int fs,
      const int ss) {
  int block_size = blockDim.x * blockDim.y;
  int count = 0;
  accscalar_t x_mean = accscalar_t(0);
  accscalar_t m_2_n = accscalar_t(0);

  int thread_id = threadIdx.y*blockDim.x + threadIdx.x;

  for (int batch_id = threadIdx.y; batch_id < bs; batch_id += blockDim.y) {
    int input_base = blockIdx.x*ss + batch_id*ss*fs;
    // sequential welford
    for (int offset = threadIdx.x; offset < ss ; offset += blockDim.x) {
      count++;
      auto x_n = static_cast<accscalar_t>(input[offset+input_base]);
      auto d = x_n - x_mean;
      x_mean += d / count;
      m_2_n += d * (x_n - x_mean);
    }
  }

  static __shared__ int s_mem[160];
  accscalar_t* s_mem_ac = (accscalar_t*) &s_mem[32];

  welford_reduce_mean_m2n<accscalar_t>(s_mem_ac, s_mem, x_mean, m_2_n, count, block_size, thread_id);

  if (thread_id == 0) {
    out_mean[blockIdx.x] = static_cast<outscalar_t>(x_mean);
    out_var_biased[blockIdx.x] = static_cast<outscalar_t>(m_2_n/count);
  }
}

// elementwise BN kernel
template <typename scalar_t, typename accscalar_t, typename layerscalar_t>
__global__ void batchnorm_forward_kernel(
      const scalar_t* __restrict__ input,
      const accscalar_t* __restrict__ mean,
      const accscalar_t* __restrict__ inv_std,
      const layerscalar_t* __restrict__ weight,
      const layerscalar_t* __restrict__ shift,
      scalar_t* __restrict__ out,
      const int ss,
      const int bs) {
  auto m_c = mean[blockIdx.x];
  auto inv_std_c = inv_std[blockIdx.x];
  auto w_c = weight == NULL ? accscalar_t(1.0) : static_cast<accscalar_t>(weight[blockIdx.x]);
  auto s_c = shift == NULL ? accscalar_t(0.0) : static_cast<accscalar_t>(shift[blockIdx.x]);

  for (int batch_offset = blockIdx.y*blockDim.y + threadIdx.y; batch_offset < bs; batch_offset += gridDim.y*blockDim.y) {
    int address_base = blockIdx.x*ss + batch_offset*gridDim.x*ss;
    for (int offset = threadIdx.x + blockIdx.z*blockDim.x; offset < ss ; offset+= gridDim.z*blockDim.x) {
      out[address_base+offset] = static_cast<scalar_t>(w_c * (static_cast<accscalar_t>(input[address_base+offset]) - m_c ) * inv_std_c + s_c);
    }
  }
}

// Backward BN kernel, calculates grad_bias, grad_weight as well as intermediate
// results to calculating grad_input.
// Breaking the grad_input to two step to support sync BN, which requires all
// reduce of the intermediate results across processes.
template <typename scalar_t, typename accscalar_t, typename layerscalar_t>
__global__ void reduce_bn_kernel(
      const scalar_t* __restrict__ input,
      const scalar_t* __restrict__ grad_output,
      const accscalar_t* __restrict__ mean,
      const accscalar_t* __restrict__ inv_std,
      accscalar_t* __restrict__ mean_dy,
      accscalar_t* __restrict__ mean_dy_xmu,
      layerscalar_t* __restrict__ grad_weight,
      layerscalar_t* __restrict__ grad_bias,
      const int bs,
      const int fs,
      const int ss) {
  static __shared__ int s_mem[64];
  int total_item_num = bs * ss;

  int thread_id = threadIdx.y*blockDim.x + threadIdx.x;

  auto r_mean = mean[blockIdx.x];
  auto factor = inv_std[blockIdx.x];

  // Kahan sum
  accscalar_t sum_dy = 0.0;
  accscalar_t sum_dy_xmu = 0.0;
  accscalar_t sum_dy_c = 0.0;
  accscalar_t sum_dy_xmu_c = 0.0;
  for (int batch_id = threadIdx.y; batch_id < bs; batch_id += blockDim.y) {
    int input_base = blockIdx.x*ss + batch_id*ss*fs;
    for (int offset = threadIdx.x; offset < ss ; offset += blockDim.x) {
      auto e_grad = static_cast<accscalar_t>(grad_output[offset+input_base]);
      auto e_input = static_cast<accscalar_t>(input[offset+input_base]);
      // calculating sum_dy
      auto sum_dy_y = e_grad - sum_dy_c;
      auto sum_dy_t = sum_dy + sum_dy_y;
      sum_dy_c = (sum_dy_t - sum_dy) - sum_dy_y;
      sum_dy = sum_dy_t;

      // calculating sum_dy_xmu
      auto sum_dy_xmu_y = e_grad * (e_input - r_mean) - sum_dy_xmu_c;
      auto sum_dy_xmu_t = sum_dy_xmu + sum_dy_xmu_y;
      sum_dy_xmu_c = (sum_dy_xmu_t - sum_dy_xmu) - sum_dy_xmu_y;
      sum_dy_xmu = sum_dy_xmu_t;
    }
  }

  sum_dy = reduce_block((accscalar_t*)s_mem, sum_dy);
  __syncthreads();
  sum_dy_xmu = reduce_block((accscalar_t*)s_mem, sum_dy_xmu);

  if (thread_id == 0) {
    if (grad_bias != NULL) {
      grad_bias[blockIdx.x] = static_cast<layerscalar_t>(sum_dy);
    }
    if (grad_weight != NULL) {
      grad_weight[blockIdx.x] = static_cast<layerscalar_t>(sum_dy_xmu * factor);
    }
    mean_dy[blockIdx.x] = sum_dy / total_item_num;
    mean_dy_xmu[blockIdx.x] = sum_dy_xmu / total_item_num;
  }
}

// elementwise backward BN kernel
template <typename scalar_t, typename accscalar_t, typename layerscalar_t>
__global__ void batchnorm_backward_kernel(
      const scalar_t* __restrict__ grad_output,
      const scalar_t* __restrict__ input,
      const accscalar_t* __restrict__ mean,
      const accscalar_t* __restrict__ inv_std,
      const layerscalar_t* __restrict__ weight,
      const accscalar_t* __restrict__ mean_dy,
      const accscalar_t* __restrict__ mean_dy_xmu,
      scalar_t* __restrict__ grad_input,
      const int ss,
      const int bs) {
  auto m_c = static_cast<accscalar_t>(mean[blockIdx.x]);
  auto m_dy_c = static_cast<accscalar_t>(mean_dy[blockIdx.x]);
  auto factor_1_c = inv_std[blockIdx.x];
  auto factor_2_c = (weight == NULL ? accscalar_t(1.0) : static_cast<accscalar_t>(weight[blockIdx.x])) * factor_1_c;
  factor_1_c = factor_1_c * factor_1_c * mean_dy_xmu[blockIdx.x];

  for (int batch_offset = blockIdx.y*blockDim.y+threadIdx.y; batch_offset < bs; batch_offset += gridDim.y*blockDim.y) {
    int address_base = blockIdx.x*ss + batch_offset*gridDim.x*ss;
    for (int offset = threadIdx.x + blockIdx.z*blockDim.x; offset < ss ; offset+= gridDim.z*blockDim.x) {
      grad_input[address_base+offset] = (static_cast<accscalar_t>(grad_output[address_base+offset]) - m_dy_c - (static_cast<accscalar_t>(input[address_base+offset]) - m_c) * factor_1_c) * factor_2_c;
    }
  }
}

// welford kernel for c last tensor calculating mean/biased_variance/unbiased_variance
template
   <typename scalar_t,
    typename accscalar_t,
    typename outscalar_t,
    int PARALLEL_LOADS>
__global__ void
welford_kernel_c_last(
      const scalar_t* __restrict__ input,
      outscalar_t* __restrict__ out_mean,
      outscalar_t* __restrict__ out_var_biased,
      volatile accscalar_t* staging_data,
      int* semaphores,
      const int reduction_size,
      const int stride) {
  // hide latency with concurrency
  accscalar_t x_mean[PARALLEL_LOADS];
  accscalar_t m_2_n[PARALLEL_LOADS];
  int count[PARALLEL_LOADS];

#pragma unroll
  for (int i = 0; i < PARALLEL_LOADS; i++) {
    x_mean[i] = accscalar_t(0);
    m_2_n[i] = accscalar_t(0);
    count[i] = accscalar_t(0);
  }
  // tensor dimension (m,c)

  // loop along m dimension
  int inner_loop_stride = blockDim.y * gridDim.y;

  // offset along m dimension
  int m_offset = blockIdx.y * blockDim.y + threadIdx.y;
  int c_offset = blockIdx.x * blockDim.x + threadIdx.x;

  int loop_count = 1 + (reduction_size - 1) / (inner_loop_stride * PARALLEL_LOADS);
  int address_base = m_offset * stride + c_offset;
  int address_increment = inner_loop_stride * stride;

  for (int i = 0; i < loop_count; i++) {
    accscalar_t x_math[PARALLEL_LOADS];
    accscalar_t x_count_inv[PARALLEL_LOADS];
    accscalar_t is_valid[PARALLEL_LOADS];

    // load multiple data in
#pragma unroll
    for (int j = 0; j < PARALLEL_LOADS; j++) {
      if (c_offset < stride && m_offset < reduction_size) {
        x_math[j] = input[address_base];
        count[j]++;
        x_count_inv[j] = accscalar_t(1) / count[j];
        is_valid[j] = accscalar_t(1);
      } else {
        x_math[j] = accscalar_t(0);
        x_count_inv[j] = accscalar_t(0);
        is_valid[j] = accscalar_t(0);
      }
      m_offset += inner_loop_stride;
      address_base += address_increment;
    }

    // calculate mean/m2n with welford
#pragma unroll
    for (int j = 0; j < PARALLEL_LOADS; j++) {
      accscalar_t delta0 = x_math[j] - x_mean[j];
      x_mean[j] += delta0 * x_count_inv[j];
      accscalar_t delta1 = x_math[j] - x_mean[j];
      m_2_n[j] += delta0 * delta1 * is_valid[j];
    }
  }

  // thread reduction to accumulate mean/m_2_n/count between PARALLEL_LOADS
#pragma unroll
  for (int j = 1; j < PARALLEL_LOADS; j++) {
    welford_merge_element(count[0], x_mean[0], m_2_n[0], count[j], x_mean[j], m_2_n[j]);
  }

  // release x_mean / m_2_n
  auto mean_th = x_mean[0];
  auto m2_th = m_2_n[0];
  auto count_th = count[0];

  // block-wise reduction with shared memory (since reduction cannot be done within a warp)
  static __shared__ accscalar_t shmem_mean[MAX_BLOCK_SIZE];
  static __shared__ accscalar_t shmem_m2n[MAX_BLOCK_SIZE];
  static __shared__ int shmem_count[MAX_BLOCK_SIZE];

  welford_merge_block_vertical(count_th, mean_th, m2_th, shmem_count, shmem_mean, shmem_m2n);

  // grid reduction if needed (coop launch used at the first place)
  if (gridDim.y > 1) {
    volatile accscalar_t* staging_mean = staging_data;
    volatile accscalar_t* staging_m2n = &staging_data[stride*gridDim.y];
    volatile int* staging_count = reinterpret_cast<volatile int*>(&staging_m2n[stride*gridDim.y]);

    address_base = c_offset + blockIdx.y * stride;
    // write data to staging_data;
    if (threadIdx.y == 0 && c_offset < stride) {
      staging_mean[address_base] = mean_th;
      staging_m2n[address_base] = m2_th;
      staging_count[address_base] = count_th;
    }

    __threadfence();
    __syncthreads(); // ensuring writes to staging_ is visible to all blocks

    __shared__ bool is_last_block_done;
    // mark block done
    if (threadIdx.x == 0 && threadIdx.y == 0) {
      int old = atomicAdd(&semaphores[blockIdx.x], 1);
      is_last_block_done = (old == (gridDim.y-1));
    }

    __syncthreads();

    // check that all data is now available in global memory
    if (is_last_block_done) {
      count_th = 0;
      mean_th = accscalar_t(0.0);
      m2_th = accscalar_t(0.0);

      for (int y = threadIdx.y; y < gridDim.y; y += blockDim.y) {
        address_base = c_offset + y * stride;
        int num_new = c_offset < stride ? staging_count[address_base] : 0;
        accscalar_t mean_new = c_offset < stride ? staging_mean[address_base] : accscalar_t(0.0);
        accscalar_t m2n_new = c_offset < stride ? staging_m2n[address_base] : accscalar_t(0.0);

        welford_merge_element(count_th, mean_th, m2_th, num_new, mean_new, m2n_new);
      }

      welford_merge_block_vertical(count_th, mean_th, m2_th, shmem_count, shmem_mean, shmem_m2n);
      if (threadIdx.y == 0 && c_offset < stride) {
        out_mean[c_offset] = static_cast<outscalar_t>(mean_th);
        out_var_biased[c_offset] = static_cast<outscalar_t>(m2_th / count_th);
      }
    }
  } else {
    if (blockIdx.y == 0 && threadIdx.y == 0 && c_offset < stride) {
      out_mean[c_offset] = static_cast<outscalar_t>(mean_th);
      out_var_biased[c_offset] = static_cast<outscalar_t>(m2_th / count_th);
    }
  }
}

// parallel welford kernel to further reduce mean / biased_var
// into mean / unbiased_var / inv_std across multiple processes.
template <typename scalar_t>
__global__ void welford_kernel_parallel(
      const scalar_t* __restrict__ mean,
      const scalar_t* __restrict__ var_biased,
      scalar_t* __restrict__ out_mean,
      scalar_t* __restrict__ out_var,
      scalar_t* __restrict__ inv_std,
      const int world_size,
      const int feature_size,
      const float eps,
      const int numel) {

  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < feature_size; i += gridDim.x * blockDim.x) {
    // load data;
    int address = i;
    scalar_t x_mean = 0;
    scalar_t m_2_n = 0;
    int count = 0;
    for (int j = 0; j < world_size; j++) {
      welford_merge_element(count, x_mean, m_2_n, numel, mean[address], var_biased[address]*numel);
      address += feature_size;
    }
    out_mean[i] = x_mean;
    out_var[i] = m_2_n/ (count - 1);
    inv_std[i] = scalar_t(1) / sqrt(m_2_n/count + eps);
  }
}

// elementwise BN kernel
template <
    typename scalar_t,
    typename accscalar_t,
    typename layerscalar_t,
    int PARALLEL_LOADS>
__global__ void batchnorm_forward_c_last_kernel(
      const scalar_t* __restrict__ input,
      const accscalar_t* __restrict__ mean,
      const accscalar_t* __restrict__ inv_std,
      const layerscalar_t* __restrict__ weight,
      const layerscalar_t* __restrict__ shift,
      scalar_t* __restrict__ out,
      const int reduction_size,
      const int stride) {
  // tensor dimension (m,c)
  // loop along m dimension
  int inner_loop_stride = blockDim.y * gridDim.y;

  // offset along m dimension
  int m_offset = blockIdx.y * blockDim.y + threadIdx.y;
  int c_offset = blockIdx.x * blockDim.x + threadIdx.x;

  auto m_c = mean[c_offset];
  auto inv_std_c = static_cast<accscalar_t>(inv_std[c_offset]);
  auto w_c = weight == NULL ? accscalar_t(1.0) : static_cast<accscalar_t>(weight[c_offset]);
  auto s_c = shift == NULL ? accscalar_t(0.0) : static_cast<accscalar_t>(shift[c_offset]);

  int loop_count = 1 + (reduction_size - 1) / (inner_loop_stride * PARALLEL_LOADS);
  int address_base = m_offset * stride + c_offset;
  int address_increment = inner_loop_stride * stride;

  for (int i = 0; i < loop_count; i++) {
#pragma unroll
    for (int j = 0; j < PARALLEL_LOADS; j++) {
      if (c_offset < stride && m_offset < reduction_size) {
        out[address_base] = static_cast<scalar_t>(
            w_c * (static_cast<accscalar_t>(input[address_base]) - m_c ) * inv_std_c + s_c
          );
      }
      m_offset += inner_loop_stride;
      address_base += address_increment;
    }
  }
}

// batchnorm backward kernel for c last tensor
template
   <typename scalar_t,
    typename accscalar_t,
    typename layerscalar_t,
    int PARALLEL_LOADS>
__global__ void reduce_bn_c_last_kernel(
      const scalar_t* __restrict__ input,
      const scalar_t* __restrict__ grad_output,
      const accscalar_t* __restrict__ mean,
      const accscalar_t* __restrict__ inv_std,
      accscalar_t* __restrict__ mean_dy,
      accscalar_t* __restrict__ mean_dy_xmu,
      layerscalar_t* __restrict__ grad_weight,
      layerscalar_t* __restrict__ grad_bias,
      volatile accscalar_t* staging_data,
      int* semaphores,
      const int reduction_size,
      const int stride) {

  // hide latency with concurrency
  accscalar_t sum_dy[PARALLEL_LOADS];
  accscalar_t sum_dy_xmu[PARALLEL_LOADS];

#pragma unroll
  for (int i = 0; i < PARALLEL_LOADS; i++) {
    sum_dy[i] = accscalar_t(0);
    sum_dy_xmu[i] = accscalar_t(0);
  }
  // tensor dimension (m,c)

  // loop along m dimension
  int inner_loop_stride = blockDim.y * gridDim.y;

  // offset along m dimension
  int m_offset = blockIdx.y * blockDim.y + threadIdx.y;
  int c_offset = blockIdx.x * blockDim.x + threadIdx.x;

  int loop_count = 1 + (reduction_size - 1) / (inner_loop_stride * PARALLEL_LOADS);
  int address_base = m_offset * stride + c_offset;
  int address_increment = inner_loop_stride * stride;

  auto r_mean = mean[c_offset];
  auto factor = inv_std[c_offset];

  for (int i = 0; i < loop_count; i++) {
    accscalar_t x_input[PARALLEL_LOADS];
    accscalar_t x_grad_output[PARALLEL_LOADS];

    // load multiple data in
#pragma unroll
    for (int j = 0; j < PARALLEL_LOADS; j++) {
      if (c_offset < stride && m_offset < reduction_size) {
        x_input[j] = input[address_base];
        x_grad_output[j] = grad_output[address_base];
      } else {
        x_input[j] = accscalar_t(0);
        x_grad_output[j] = accscalar_t(0);
      }
      m_offset += inner_loop_stride;
      address_base += address_increment;
    }

    // calculate sum_dy / sum_dy_xmu
#pragma unroll
    for (int j = 0; j < PARALLEL_LOADS; j++) {
      sum_dy[j] += x_grad_output[j];
      sum_dy_xmu[j] += x_grad_output[j] * (x_input[j] - r_mean);
    }
  }

  // thread reduction to accumulate sum_dy / sum_dy_xmu between PARALLEL_LOADS
#pragma unroll
  for (int j = 1; j < PARALLEL_LOADS; j++) {
    sum_dy[0] += sum_dy[j];
    sum_dy_xmu[0] += sum_dy_xmu[j];
  }

  // release array of registers
  auto sum_dy_th = sum_dy[0];
  auto sum_dy_xmu_th = sum_dy_xmu[0];

  // block-wise reduction with shared memory (since reduction cannot be done within a warp)
  static __shared__ accscalar_t shmem_sum_dy[MAX_BLOCK_SIZE];
  static __shared__ accscalar_t shmem_sum_dy_xmu[MAX_BLOCK_SIZE];

  merge_block_vertical(sum_dy_th, sum_dy_xmu_th, shmem_sum_dy, shmem_sum_dy_xmu);

  // grid reduction if needed (coop launch used at the first place)
  if (gridDim.y > 1) {
    volatile accscalar_t* staging_sum_dy = staging_data;
    volatile accscalar_t* staging_sum_dy_xmu = &staging_data[stride*gridDim.y];

    address_base = c_offset + blockIdx.y * stride;
    // write data to staging_data;
    if (threadIdx.y == 0 && c_offset < stride) {
      staging_sum_dy[address_base] = sum_dy_th;
      staging_sum_dy_xmu[address_base] = sum_dy_xmu_th;
    }

    __threadfence();
    __syncthreads(); // ensuring writes to staging_ is visible to all blocks

    __shared__ bool is_last_block_done;
    // mark block done
    if (threadIdx.x == 0 && threadIdx.y == 0) {
      int old = atomicAdd(&semaphores[blockIdx.x], 1);
      is_last_block_done = (old == (gridDim.y-1));
    }

    __syncthreads();

    // check that all data is now available in global memory
    if (is_last_block_done) {
      sum_dy_th = accscalar_t(0.0);
      sum_dy_xmu_th = accscalar_t(0.0);

      for (int y = threadIdx.y; y < gridDim.y; y += blockDim.y) {
        address_base = c_offset + y * stride;
        sum_dy_th += (c_offset < stride ? staging_sum_dy[address_base] : accscalar_t(0.0));
        sum_dy_xmu_th += (c_offset < stride ? staging_sum_dy_xmu[address_base] : accscalar_t(0.0));
      }

      merge_block_vertical(sum_dy_th, sum_dy_xmu_th, shmem_sum_dy, shmem_sum_dy_xmu);
      if (threadIdx.y == 0 && c_offset < stride) {
        if (grad_bias != NULL) {
          grad_bias[c_offset] = static_cast<layerscalar_t>(sum_dy_th);
        }
        if (grad_weight != NULL) {
          grad_weight[c_offset] = static_cast<layerscalar_t>(sum_dy_xmu_th * factor);
        }
        mean_dy[c_offset] = sum_dy_th / reduction_size;
        mean_dy_xmu[c_offset] = sum_dy_xmu_th / reduction_size;
      }
    }
  } else {
    if (blockIdx.y == 0 && threadIdx.y == 0 && c_offset < stride) {
      if (grad_bias != NULL) {
        grad_bias[c_offset] = static_cast<layerscalar_t>(sum_dy_th);
      }
      if (grad_weight != NULL) {
        grad_weight[c_offset] = static_cast<layerscalar_t>(sum_dy_xmu_th * factor);
      }
      mean_dy[c_offset] = sum_dy_th / reduction_size;
      mean_dy_xmu[c_offset] = sum_dy_xmu_th / reduction_size;
    }
  }
}

// elementwise BN kernel
template <
    typename scalar_t,
    typename accscalar_t,
    typename layerscalar_t,
    int PARALLEL_LOADS>
__global__ void batchnorm_backward_c_last_kernel(
      const scalar_t* __restrict__ grad_output,
      const scalar_t* __restrict__ input,
      const accscalar_t* __restrict__ mean,
      const accscalar_t* __restrict__ inv_std,
      const layerscalar_t* __restrict__ weight,
      const accscalar_t* __restrict__ mean_dy,
      const accscalar_t* __restrict__ mean_dy_xmu,
      scalar_t* __restrict__ grad_input,
      const int reduction_size,
      const int stride) {
  // tensor dimension (m,c)
  // loop along m dimension
  int inner_loop_stride = blockDim.y * gridDim.y;

  // offset along m dimension
  int m_offset = blockIdx.y * blockDim.y + threadIdx.y;
  int c_offset = blockIdx.x * blockDim.x + threadIdx.x;

  auto m_c = mean[c_offset];
  auto m_dy_c = mean_dy[c_offset];
  auto factor_1_c = inv_std[c_offset];
  auto factor_2_c = (weight == NULL? accscalar_t(1.0) : static_cast<accscalar_t>(weight[c_offset])) * factor_1_c;
  factor_1_c = factor_1_c * factor_1_c * mean_dy_xmu[c_offset];

  int loop_count = 1 + (reduction_size - 1) / (inner_loop_stride * PARALLEL_LOADS);
  int address_base = m_offset * stride + c_offset;
  int address_increment = inner_loop_stride * stride;

  for (int i = 0; i < loop_count; i++) {
#pragma unroll
    for (int j = 0; j < PARALLEL_LOADS; j++) {
      if (c_offset < stride && m_offset < reduction_size) {
        grad_input[address_base] = static_cast<scalar_t>(
            (static_cast<accscalar_t>(grad_output[address_base]) - m_dy_c -
            (static_cast<accscalar_t>(input[address_base]) - m_c) * factor_1_c)
            * factor_2_c);
      }
      m_offset += inner_loop_stride;
      address_base += address_increment;
    }
  }
}

std::vector<at::Tensor> welford_mean_var_CUDA(const at::Tensor input) {
  const auto batch_size = input.size(0);
  const auto feature_size = input.size(1);

  auto space_size = get_tensor_spatial_size(input);
  auto scalar_type = promote_scalartype(input);

  at::Tensor out_var_biased = at::empty({feature_size}, input.options().dtype(scalar_type));
  at::Tensor out_mean = at::empty({feature_size}, input.options().dtype(scalar_type));

  int block_y = min(h_last_pow2(batch_size), int(MAX_BLOCK_SIZE / 32));
  int block_x = max(1, min(MAX_BLOCK_SIZE / block_y, h_last_pow2(space_size)));
  const dim3 block(block_x, block_y);
  const dim3 grid(feature_size);

  auto stream = at::cuda::getCurrentCUDAStream();

  {
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "welford_mean_var_kernel",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      welford_kernel<scalar_t_0, accscalar_t, accscalar_t><<<grid, block, 0, stream>>>(
          input.data<scalar_t_0>(),
          out_mean.data<accscalar_t>(),
          out_var_biased.data<accscalar_t>(),
          batch_size,
          feature_size,
          space_size);
    );
  }

  return {out_mean, out_var_biased};
}

at::Tensor batchnorm_forward_CUDA(
    const at::Tensor input,
    const at::Tensor mean,
    const at::Tensor inv_std,
    const at::optional<at::Tensor> weight,
    const at::optional<at::Tensor> shift) {
  const auto batch_size = input.size(0);
  const auto feature_size = input.size(1);
  at::Tensor out = at::empty_like(input);

  auto space_size = get_tensor_spatial_size(input);

  int block_x = max(32, min(MAX_BLOCK_SIZE, h_last_pow2(space_size)/4));
  int block_y = max(1, min(MAX_BLOCK_SIZE/block_x, h_last_pow2(batch_size)/4));
  const dim3 block(block_x, block_y);
  int grid_z = max(1, min(65535, h_last_pow2(space_size)/4/block_x));
  int batch_group_size = max(1, min(65535, h_last_pow2(batch_size)/block_y));
  const dim3 grid(feature_size, batch_group_size, grid_z);
  auto stream = at::cuda::getCurrentCUDAStream();

  if (input.scalar_type() == at::ScalarType::Half
      && weight.has_value() &&
      weight.value().scalar_type() == at::ScalarType::Float) {
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_forward",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      batchnorm_forward_kernel<scalar_t_0, accscalar_t, accscalar_t><<<grid, block, 0, stream>>>(
          input.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          weight.has_value() ? weight.value().data<accscalar_t>() : NULL,
          shift.has_value() ? shift.value().data<accscalar_t>() : NULL,
          out.data<scalar_t_0>(),
          space_size,
          batch_size);
    );
  } else {
    if (weight.has_value()) {
      AT_CHECK(input.scalar_type() == weight.value().scalar_type(),
          "input.scalar_type() is not supported with weight.scalar_type()");
    }
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_forward",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      batchnorm_forward_kernel<scalar_t_0, accscalar_t, scalar_t_0><<<grid, block, 0, stream>>>(
          input.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          weight.has_value() ? weight.value().data<scalar_t_0>() : NULL,
          shift.has_value() ? shift.value().data<scalar_t_0>() : NULL,
          out.data<scalar_t_0>(),
          space_size,
          batch_size);
    );
  }
  return out;
}

std::vector<at::Tensor> reduce_bn_CUDA(
    const at::Tensor grad_output,
    const at::Tensor input,
    const at::Tensor mean,
    const at::Tensor inv_std,
    const at::optional<at::Tensor> weight)
{
  const auto batch_size = input.size(0);
  const auto feature_size = input.size(1);

  auto scalar_type = promote_scalartype(input);

  at::Tensor mean_dy = at::empty({feature_size}, mean.options());
  at::Tensor mean_dy_xmu = at::empty({feature_size}, mean.options());

  at::Tensor grad_weight;
  at::Tensor grad_bias;
  if (weight.has_value()) {
    grad_weight = at::empty({feature_size}, weight.value().options());
    grad_bias = at::empty({feature_size}, weight.value().options());
  } else {
    grad_weight = at::empty({0}, mean.options());
    grad_bias = at::empty({0}, mean.options());
  }

  auto space_size = get_tensor_spatial_size(input);

  int block_y = min(h_last_pow2(batch_size), int(MAX_BLOCK_SIZE/ 32));
  int block_x = max(1, min(MAX_BLOCK_SIZE/ block_y, h_last_pow2(space_size)));
  const dim3 block(block_x, block_y);
  const dim3 grid(feature_size);
  auto stream = at::cuda::getCurrentCUDAStream();

  if (input.scalar_type() == at::ScalarType::Half
      && weight.has_value() &&
      weight.value().scalar_type() == at::ScalarType::Float) {
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_backward_reduce",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      reduce_bn_kernel<scalar_t_0, accscalar_t, accscalar_t><<<grid, block, 0, stream>>>(
          input.data<scalar_t_0>(),
          grad_output.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          mean_dy.data<accscalar_t>(),
          mean_dy_xmu.data<accscalar_t>(),
          weight.has_value() ? grad_weight.data<accscalar_t>() : NULL,
          weight.has_value() ? grad_bias.data<accscalar_t>() : NULL,
          batch_size,
          feature_size,
          space_size);
    );
  } else {
    if (weight.has_value()) {
        AT_CHECK(input.scalar_type() == weight.value().scalar_type(),
            "input.scalar_type() is not supported with weight.scalar_type()");
    }
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_backward_reduce",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      reduce_bn_kernel<scalar_t_0, accscalar_t, scalar_t_0><<<grid, block, 0, stream>>>(
          input.data<scalar_t_0>(),
          grad_output.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          mean_dy.data<accscalar_t>(),
          mean_dy_xmu.data<accscalar_t>(),
          weight.has_value() ? grad_weight.data<scalar_t_0>() : NULL,
          weight.has_value() ? grad_bias.data<scalar_t_0>() : NULL,
          batch_size,
          feature_size,
          space_size);
    );
  }

  return {mean_dy, mean_dy_xmu, grad_weight, grad_bias};
}

at::Tensor batchnorm_backward_CUDA(
    const at::Tensor grad_output,
    const at::Tensor input,
    const at::Tensor mean,
    const at::Tensor inv_std,
    const at::optional<at::Tensor> weight,
    const at::Tensor mean_dy,
    const at::Tensor mean_dy_xmu) {
  const auto batch_size = input.size(0);
  const auto feature_size = input.size(1);

  at::Tensor grad_input = at::empty_like(input);

  auto space_size = get_tensor_spatial_size(input);

  int block_x = max(32, min(MAX_BLOCK_SIZE, h_last_pow2(space_size)/4));
  int block_y = max(1, min(MAX_BLOCK_SIZE/block_x, h_last_pow2(batch_size)/4));
  const dim3 block(block_x, block_y);
  int grid_z = max(1, min(65535, h_last_pow2(space_size)/4/block_x));
  int batch_group_size = max(1, min(65535, h_last_pow2(batch_size)/block_y));
  const dim3 grid(feature_size, batch_group_size, grid_z);

  auto stream = at::cuda::getCurrentCUDAStream();

  if (input.scalar_type() == at::ScalarType::Half
      && weight.has_value() &&
      weight.value().scalar_type() == at::ScalarType::Float) {
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_backward",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      batchnorm_backward_kernel<scalar_t_0, accscalar_t, accscalar_t><<<grid, block, 0, stream>>>(
          grad_output.data<scalar_t_0>(),
          input.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          weight.has_value() ? weight.value().data<accscalar_t>() : NULL,
          mean_dy.data<accscalar_t>(),
          mean_dy_xmu.data<accscalar_t>(),
          grad_input.data<scalar_t_0>(),
          space_size,
          batch_size);
    );
  } else {
    if (weight.has_value()) {
      AT_CHECK(input.scalar_type() == weight.value().scalar_type(),
          "input.scalar_type() is not supported with weight.scalar_type()");
    }
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_backward",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      batchnorm_backward_kernel<scalar_t_0, accscalar_t, scalar_t_0><<<grid, block, 0, stream>>>(
          grad_output.data<scalar_t_0>(),
          input.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          weight.has_value() ? weight.value().data<scalar_t_0>() : NULL,
          mean_dy.data<accscalar_t>(),
          mean_dy_xmu.data<accscalar_t>(),
          grad_input.data<scalar_t_0>(),
          space_size,
          batch_size);
    );
  }

  return grad_input;
}

std::vector<at::Tensor> welford_parallel_CUDA(const at::Tensor mean_feature_nodes,
                                              const at::Tensor var_biased,
                                              int numel,
                                              const float eps) {
  const auto world_size = mean_feature_nodes.size(0);
  const auto feature_size = mean_feature_nodes.size(1);

  at::Tensor out_var = at::empty({feature_size}, var_biased.options());
  at::Tensor inv_std = at::empty_like(out_var);
  at::Tensor out_mean = at::empty_like(out_var);

  // TODO(jie): tile this for memory coalescing!
  const int block = std::min(h_last_pow2(feature_size), MAX_BLOCK_SIZE);
  const int grid = std::max<int>(1, feature_size / block);

  auto stream = at::cuda::getCurrentCUDAStream();

  {
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(mean_feature_nodes.scalar_type(), 0, "welford_parallel_kernel",
      welford_kernel_parallel<scalar_t_0><<<grid, block, 0, stream>>>(
          mean_feature_nodes.data<scalar_t_0>(),
          var_biased.data<scalar_t_0>(),
          out_mean.data<scalar_t_0>(),
          out_var.data<scalar_t_0>(),
          inv_std.data<scalar_t_0>(),
          world_size,
          feature_size,
          eps,
          numel);
    );
  }

  return {out_mean, out_var, inv_std};
}

std::vector<at::Tensor> welford_mean_var_c_last_CUDA(const at::Tensor input) {
  const auto stride = input.size(input.ndimension()-1);
  const auto reduction_size = input.numel() / stride;

  auto scalar_type = promote_scalartype(input);
  auto option = input.options().dtype(scalar_type);

  at::Tensor out_var_biased = at::empty({stride}, option);
  at::Tensor out_mean = at::empty({stride}, option);

  dim3 block;
  dim3 grid;
  flexible_launch_configs(reduction_size, stride, block, grid, true);

  at::Tensor staging_data;
  at::Tensor semaphores;
  if (grid.y > 1) {
    staging_data = at::empty({4*stride*grid.y}, option);
    semaphores = at::zeros({grid.x}, input.options().dtype(at::kInt));
  }

  auto stream = at::cuda::getCurrentCUDAStream();

  {
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "welford_mean_var_c_last",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      accscalar_t* staging_data_ptr = grid.y > 1 ? staging_data.data<accscalar_t>() : nullptr;
      int* semaphores_ptr = grid.y > 1 ? semaphores.data<int>() : nullptr;
      welford_kernel_c_last<scalar_t_0, accscalar_t, accscalar_t, ELEMENTS_PER_ITER>
          <<<grid, block, 0, stream>>>(
          input.data<scalar_t_0>(),
          out_mean.data<accscalar_t>(),
          out_var_biased.data<accscalar_t>(),
          staging_data_ptr,
          semaphores_ptr,
          reduction_size,
          stride);
    );
  }

  return {out_mean, out_var_biased};
}

at::Tensor batchnorm_forward_c_last_CUDA(
    const at::Tensor input,
    const at::Tensor mean,
    const at::Tensor inv_std,
    const at::optional<at::Tensor> weight,
    const at::optional<at::Tensor> shift) {
  const auto stride = input.size(input.ndimension()-1);
  const auto reduction_size = input.numel() / stride;

  at::Tensor out = at::empty_like(input);

  dim3 block;
  dim3 grid;
  flexible_launch_configs(reduction_size, stride, block, grid);

  auto stream = at::cuda::getCurrentCUDAStream();

  if (input.scalar_type() == at::ScalarType::Half
      && weight.has_value() && weight.value().scalar_type() == at::ScalarType::Float) {
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_forward",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      batchnorm_forward_c_last_kernel<scalar_t_0, accscalar_t, accscalar_t, ELEMENTS_PER_ITER>
          <<<grid, block, 0, stream>>>(
          input.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          weight.has_value() ? weight.value().data<accscalar_t>() : NULL,
          shift.has_value() ? shift.value().data<accscalar_t>(): NULL,
          out.data<scalar_t_0>(),
          reduction_size,
          stride);
    );
  } else {
    if (weight.has_value()) {
      AT_CHECK(input.scalar_type() == weight.value().scalar_type(),
          "input.scalar_type() is not supported with weight.scalar_type()");
    }
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_forward",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      batchnorm_forward_c_last_kernel<scalar_t_0, accscalar_t, scalar_t_0, ELEMENTS_PER_ITER>
          <<<grid, block, 0, stream>>>(
          input.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          weight.has_value() ? weight.value().data<scalar_t_0>() : NULL,
          shift.has_value() ? shift.value().data<scalar_t_0>(): NULL,
          out.data<scalar_t_0>(),
          reduction_size,
          stride);
    );
  }
  return out;
}

std::vector<at::Tensor> reduce_bn_c_last_CUDA(
    const at::Tensor grad_output,
    const at::Tensor input,
    const at::Tensor mean,
    const at::Tensor inv_std,
    const at::optional<at::Tensor> weight) {
  const auto stride = input.size(input.ndimension()-1);
  const auto reduction_size = input.numel() / stride;

  at::Tensor mean_dy = at::empty({stride}, mean.options());
  at::Tensor mean_dy_xmu = at::empty({stride}, mean.options());

  at::Tensor grad_weight;
  at::Tensor grad_bias;
  if (weight.has_value()) {
    grad_weight = at::empty({stride}, weight.value().options());
    grad_bias = at::empty({stride}, weight.value().options());
  } else {
    // because I cannot return an uninitialized at::Tensor
    grad_weight = at::empty({0}, mean.options());
    grad_bias = at::empty({0}, mean.options());
  }

  dim3 block;
  dim3 grid;
  flexible_launch_configs(reduction_size, stride, block, grid, true);

  at::Tensor staging_data;
  at::Tensor semaphores;
  if (grid.y > 1) {
    staging_data = at::empty({2*stride*grid.y}, mean.options());
    semaphores = at::zeros({grid.x}, input.options().dtype(at::kInt));
  }
  auto stream = at::cuda::getCurrentCUDAStream();

  if (input.scalar_type() == at::ScalarType::Half
      && weight.has_value()
      && weight.value().scalar_type() == at::ScalarType::Float) {
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_backward_reduce",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      accscalar_t* staging_data_ptr = grid.y > 1 ? staging_data.data<accscalar_t>() : nullptr;
      int* semaphores_ptr = grid.y > 1 ? semaphores.data<int>() : nullptr;
      reduce_bn_c_last_kernel<scalar_t_0, accscalar_t, accscalar_t, ELEMENTS_PER_ITER>
          <<<grid, block, 0, stream>>>(
          input.data<scalar_t_0>(),
          grad_output.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          mean_dy.data<accscalar_t>(),
          mean_dy_xmu.data<accscalar_t>(),
          weight.has_value() ? grad_weight.data<accscalar_t>() : NULL,
          weight.has_value() ?grad_bias.data<accscalar_t>() : NULL,
          staging_data_ptr,
          semaphores_ptr,
          reduction_size,
          stride);
    );
  } else {
    if (weight.has_value()) {
      AT_CHECK(input.scalar_type() == weight.value().scalar_type(),
          "input.scalar_type() is not supported with weight.scalar_type()");
    }
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_backward_reduce",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      accscalar_t* staging_data_ptr = grid.y > 1 ? staging_data.data<accscalar_t>() : nullptr;
      int* semaphores_ptr = grid.y > 1 ? semaphores.data<int>() : nullptr;
      reduce_bn_c_last_kernel<scalar_t_0, accscalar_t, scalar_t_0, ELEMENTS_PER_ITER>
          <<<grid, block, 0, stream>>>(
          input.data<scalar_t_0>(),
          grad_output.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          mean_dy.data<accscalar_t>(),
          mean_dy_xmu.data<accscalar_t>(),
          weight.has_value() ? grad_weight.data<scalar_t_0>() : NULL,
          weight.has_value() ?grad_bias.data<scalar_t_0>() : NULL,
          staging_data_ptr,
          semaphores_ptr,
          reduction_size,
          stride);
    );
  }

  return {mean_dy, mean_dy_xmu, grad_weight, grad_bias};
}

at::Tensor batchnorm_backward_c_last_CUDA(
    const at::Tensor grad_output,
    const at::Tensor input,
    const at::Tensor mean,
    const at::Tensor inv_std,
    const at::optional<at::Tensor> weight,
    const at::Tensor mean_dy,
    const at::Tensor mean_dy_xmu) {
  const auto stride = input.size(input.ndimension()-1);
  const auto reduction_size = input.numel() / stride;

  at::Tensor grad_input = at::empty_like(input);

  dim3 block;
  dim3 grid;
  flexible_launch_configs(reduction_size, stride, block, grid);

  auto stream = at::cuda::getCurrentCUDAStream();

  if (input.scalar_type() == at::ScalarType::Half
      && weight.has_value() && weight.value().scalar_type() == at::ScalarType::Float) {
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_forward",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      batchnorm_backward_c_last_kernel<scalar_t_0, accscalar_t, accscalar_t, ELEMENTS_PER_ITER>
          <<<grid, block, 0, stream>>>(
          grad_output.data<scalar_t_0>(),
          input.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          weight.has_value() ? weight.value().data<accscalar_t>() : NULL,
          mean_dy.data<accscalar_t>(),
          mean_dy_xmu.data<accscalar_t>(),
          grad_input.data<scalar_t_0>(),
          reduction_size,
          stride);
    );
  } else {
    if (weight.has_value()) {
      AT_CHECK(input.scalar_type() == weight.value().scalar_type(),
          "input.scalar_type() is not supported with weight.scalar_type()");
    }
    using namespace at;
    DISPATCH_FLOAT_AND_HALF(input.scalar_type(), 0, "batchnorm_forward",
      using accscalar_t = at::acc_type<scalar_t_0, true>;
      batchnorm_backward_c_last_kernel<scalar_t_0, accscalar_t, scalar_t_0, ELEMENTS_PER_ITER>
          <<<grid, block, 0, stream>>>(
          grad_output.data<scalar_t_0>(),
          input.data<scalar_t_0>(),
          mean.data<accscalar_t>(),
          inv_std.data<accscalar_t>(),
          weight.has_value() ? weight.value().data<scalar_t_0>() : NULL,
          mean_dy.data<accscalar_t>(),
          mean_dy_xmu.data<accscalar_t>(),
          grad_input.data<scalar_t_0>(),
          reduction_size,
          stride);
    );
  }
 
  return grad_input;
}


================================================
FILE: apex/docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
SPHINXPROJ    = NVIDIAAPEX
SOURCEDIR     = source
BUILDDIR      = build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

gh-pages:
	git checkout gh-pages
	rm -rf build
	rm -rf source
	git checkout master -- .
	make html
	rm -rf ../_modules ../_sources ../_static
	mv -fv build/html/* ../
	rm -rf build
	git add -A
	git commit -m "Generated gh-pages for `git log master -1 --pretty=short --abbrev-commit`" && git push origin gh-pages ; git checkout master

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: apex/docs/source/_static/css/pytorch_theme.css
================================================
body {
    font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
}

/* Default header fonts are ugly */
h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption {
    font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
}

/* Use white for docs background */
.wy-side-nav-search {
    background-color: #fff;
}

.wy-nav-content-wrap, .wy-menu li.current > a  {
    background-color: #fff;
}

@media screen and (min-width: 1400px) {
    .wy-nav-content-wrap {
        background-color: rgba(0, 0, 0, 0.0470588);
    }

    .wy-nav-content {
        background-color: #fff;
    }
}

/* Fixes for mobile */
.wy-nav-top {
    background-color: #fff;
    background-image: url('../img/apex.jpg');
    background-repeat: no-repeat;
    background-position: center;
    padding: 0;
    margin: 0.4045em 0.809em;
    color: #333;
}

.wy-nav-top > a {
    display: none;
}

@media screen and (max-width: 768px) {
    .wy-side-nav-search>a img.logo {
        height: 60px;
    }
}

/* This is needed to ensure that logo above search scales properly */
.wy-side-nav-search a {
    display: block;
}

/* This ensures that multiple constructors will remain in separate lines. */
.rst-content dl:not(.docutils) dt {
    display: table;
}

/* Use our red for literals (it's very similar to the original color) */
.rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal {
    color: #F05732;
}

.rst-content tt.xref, a .rst-content tt, .rst-content tt.xref,
.rst-content code.xref, a .rst-content tt, a .rst-content code {
    color: #404040;
}

/* Change link colors (except for the menu) */

a {
    color: #F05732;
}

a:hover {
    color: #F05732;
}


a:visited {
    color: #D44D2C;
}

.wy-menu a {
    color: #b3b3b3;
}

.wy-menu a:hover {
    color: #b3b3b3;
}

/* Default footer text is quite big */
footer {
    font-size: 80%;
}

footer .rst-footer-buttons {
    font-size: 125%; /* revert footer settings - 1/80% = 125% */
}

footer p {
    font-size: 100%;
}

/* For hidden headers that appear in TOC tree */
/* see http://stackoverflow.com/a/32363545/3343043 */
.rst-content .hidden-section {
    display: none;
}

nav .hidden-section {
    display: inherit;
}

.wy-side-nav-search>div.version {
    color: #000;
}


================================================
FILE: apex/docs/source/_templates/layout.html
================================================
{% extends "!layout.html" %}
  {% block sidebartitle %} {{ super() }}

  <style>
    /* Sidebar header (and topbar for mobile) */
    .wy-side-nav-search, .wy-nav-top {
      background: #76b900;
    }

    .wy-side-nav-search a:link, .wy-nav-top a:link {
      color: #fff;
    }
    .wy-side-nav-search a:visited, .wy-nav-top a:visited {
      color: #fff;
    }
    .wy-side-nav-search a:hover, .wy-nav-top a:hover {
      color: #fff;
    }

    .wy-menu-vertical a:link, .wy-menu-vertical a:visited {
      color: #d9d9d9
    }

    .wy-menu-vertical a:active {
      background-color: #76b900
    }

    .wy-side-nav-search>div.version {
      color: rgba(0, 0, 0, 0.3)
    }
  </style>
  {% endblock %}

  {% block footer %} {{ super() }}

  <style>
  a:link, a:visited {
    color: #76b900;
  }

  a:hover {
    color: #8c0;
  }

  .rst-content dl:not(.docutils) dt {
    background: rgba(118, 185, 0, 0.1);
    color: rgba(59,93,0,1);
    border-top: solid 3px rgba(59,93,0,1);
  }
  </style>
  {% endblock %}


================================================
FILE: apex/docs/source/advanced.rst
================================================
.. role:: hidden
    :class: hidden-section

Advanced Amp Usage
===================================

GANs
----

GANs are an interesting synthesis of several topics below.  A `comprehensive example`_
is under construction.

.. _`comprehensive example`:
    https://github.com/NVIDIA/apex/tree/master/examples/dcgan

Gradient clipping
-----------------
Amp calls the params owned directly by the optimizer's ``param_groups`` the "master params."

These master params may be fully or partially distinct from ``model.parameters()``.
For example, with `opt_level="O2"`_, ``amp.initialize`` casts most model params to FP16,
creates an FP32 master param outside the model for each newly-FP16 model param,
and updates the optimizer's ``param_groups`` to point to these FP32 params.

The master params owned by the optimizer's ``param_groups`` may also fully coincide with the
model params, which is typically true for ``opt_level``\s ``O0``, ``O1``, and ``O3``.

In all cases, correct practice is to clip the gradients of the params that are guaranteed to be
owned **by the optimizer's** ``param_groups``, instead of those retrieved via ``model.parameters()``.

Also, if Amp uses loss scaling, gradients must be clipped after they have been unscaled
(which occurs during exit from the ``amp.scale_loss`` context manager).

The following pattern should be correct for any ``opt_level``::

    with amp.scale_loss(loss, optimizer) as scaled_loss:
        scaled_loss.backward()
        # Gradients are unscaled during context manager exit.
    # Now it's safe to clip.  Replace
    # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
    # with
    torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), max_norm)
    # or
    torch.nn.utils.clip_grad_value_(amp.master_params(optimizer), max_)

Note the use of the utility function ``amp.master_params(optimizer)``,
which returns a generator-expression that iterates over the
params in the optimizer's ``param_groups``.

Also note that ``clip_grad_norm_(amp.master_params(optimizer), max_norm)`` is invoked
*instead of*, not *in addition to*, ``clip_grad_norm_(model.parameters(), max_norm)``.

.. _`opt_level="O2"`:
    https://nvidia.github.io/apex/amp.html#o2-fast-mixed-precision

Custom/user-defined autograd functions
--------------------------------------

The old Amp API for `registering user functions`_ is still considered correct.  Functions must
be registered before calling ``amp.initialize``.

.. _`registering user functions`:
    https://github.com/NVIDIA/apex/tree/master/apex/amp#annotating-user-functions

Forcing particular layers/functions to a desired type
-----------------------------------------------------

I'm still working on a generalizable exposure for this that won't require user-side code divergence
across different ``opt-level``\ s.

Multiple models/optimizers/losses
---------------------------------

Initialization with multiple models/optimizers
**********************************************

``amp.initialize``'s optimizer argument may be a single optimizer or a list of optimizers,
as long as the output you accept has the same type.
Similarly, the ``model`` argument may be a single model or a list of models, as long as the accepted
output matches.  The following calls are all legal::

    model, optim = amp.initialize(model, optim,...)
    model, [optim0, optim1] = amp.initialize(model, [optim0, optim1],...)
    [model0, model1], optim = amp.initialize([model0, model1], optim,...)
    [model0, model1], [optim0, optim1] = amp.initialize([model0, model1], [optim0, optim1],...)

Backward passes with multiple optimizers
****************************************

Whenever you invoke a backward pass, the ``amp.scale_loss`` context manager must receive
**all the optimizers that own any params for which the current backward pass is creating gradients.**
This is true even if each optimizer owns only some, but not all, of the params that are about to
receive gradients.

If, for a given backward pass, there's only one optimizer whose params are about to receive gradients,
you may pass that optimizer directly to ``amp.scale_loss``.  Otherwise, you must pass the
list of optimizers whose params are about to receive gradients::

    # loss0 accumulates gradients only into params owned by optim0:
    with amp.scale_loss(loss0, optim0) as scaled_loss:
        scaled_loss.backward()

    # loss1 accumulates gradients only into params owned by optim1:
    with amp.scale_loss(loss1, optim1) as scaled_loss:
        scaled_loss.backward()

    # loss2 accumulates gradients into some params owned by optim0
    # and some params owned by optim1
    with amp.scale_loss(loss2, [optim0, optim1]) as scaled_loss:
        scaled_loss.backward()

Optionally have Amp use a different loss scaler per-loss
********************************************************

By default, Amp maintains a single global loss scaler that will be used for all backward passes
(all invocations of ``with amp.scale_loss(...)``).  No additional arguments to ``amp.initialize``
or ``amp.scale_loss`` are required to use the global loss scaler.  The code snippets above with
multiple optimizers/backward passes use the single global loss scaler under the hood,
and they should "just work."

However, you can optionally tell Amp to maintain a loss scaler per-loss, which gives Amp increased
numerical flexibility.  This is accomplished by supplying the ``num_losses`` argument to
``amp.initialize`` (which tells Amp how many backward passes you plan to invoke, and therefore
how many loss scalers Amp should create), then supplying the ``loss_id`` argument to each of your
backward passes (which tells Amp the loss scaler to use for this particular backward pass)::

    model, [optim0, optim1] = amp.initialize(model, [optim0, optim1], ..., num_losses=3)

    with amp.scale_loss(loss0, optim0, loss_id=0) as scaled_loss:
        scaled_loss.backward()

    with amp.scale_loss(loss1, optim1, loss_id=1) as scaled_loss:
        scaled_loss.backward()

    with amp.scale_loss(loss2, [optim0, optim1], loss_id=2) as scaled_loss:
        scaled_loss.backward()

``num_losses`` and ``loss_id``\ s should be specified purely based on the set of
losses/backward passes.  The use of multiple optimizers, or association of single or
multiple optimizers with each backward pass, is unrelated.

Gradient accumulation across iterations
---------------------------------------

The following should "just work," and properly accommodate multiple models/optimizers/losses, as well as
gradient clipping via the `instructions above`_::

    if iter%iters_to_accumulate == 0:
        # Every iters_to_accumulate iterations, unscale and step
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        # Gradient clipping if desired:
        # torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), max_norm)
        optimizer.step()
        optimizer.zero_grad()
    else:
        # Otherwise, accumulate gradients, don't unscale or step.
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()

As a minor performance optimization, you can pass ``delay_unscale=True``
to ``amp.scale_loss`` until you're ready to ``step()``.  You should only attempt ``delay_unscale=True``
if you're sure you know what you're doing, because the interaction with gradient clipping and
multiple models/optimizers/losses can become tricky.::

    if iter%iters_to_accumulate == 0:
        # Every iters_to_accumulate iterations, unscale and step
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    else:
        # Otherwise, accumulate gradients, don't unscale or step.
        with amp.scale_loss(loss, optimizer, delay_unscale=True) as scaled_loss:
            scaled_loss.backward()

.. _`instructions above`:
    https://nvidia.github.io/apex/advanced.html#gradient-clipping

Custom data batch types
-----------------------

The intention of Amp is that you never need to cast your input data manually, regardless of
``opt_level``.  Amp accomplishes this by patching any models' ``forward`` methods to cast
incoming data appropriately for the ``opt_level``.  But to cast incoming data,
Amp needs to know how.  The patched ``forward`` will recognize and cast floating-point Tensors
(non-floating-point Tensors like IntTensors are not touched) and
Python containers of floating-point Tensors.  However, if you wrap your Tensors in a custom class,
the casting logic doesn't know how to drill
through the tough custom shell to access and cast the juicy Tensor meat within.  You need to tell
Amp how to cast your custom batch class, by assigning it a ``to`` method that accepts a ``torch.dtype``
(e.g., ``torch.float16`` or ``torch.float32``) and returns an instance of the custom batch cast to
``dtype``.  The patched ``forward`` checks for the presence of your ``to`` method, and will
invoke it with the correct type for the ``opt_level``.

Example::

    class CustomData(object):
        def __init__(self):
            self.tensor = torch.cuda.FloatTensor([1,2,3])

        def to(self, dtype):
            self.tensor = self.tensor.to(dtype)
            return self

.. warning::

    Amp also forwards numpy ndarrays without casting them.  If you send input data as a raw, unwrapped
    ndarray, then later use it to create a Tensor within your ``model.forward``, this Tensor's type will
    not depend on the ``opt_level``, and may or may not be correct.  Users are encouraged to pass
    castable data inputs (Tensors, collections of Tensors, or custom classes with a ``to`` method)
    wherever possible.

.. note::

    Amp does not call ``.cuda()`` on any Tensors for you.  Amp assumes that your original script
    is already set up to move Tensors from the host to the device as needed.


================================================
FILE: apex/docs/source/amp.rst
================================================
.. role:: hidden
    :class: hidden-section

apex.amp
===================================

This page documents the updated API for Amp (Automatic Mixed Precision),
a tool to enable Tensor Core-accelerated training in only 3 lines of Python.

A `runnable, comprehensive Imagenet example`_ demonstrating good practices can be found
on the Github page.

GANs are a tricky case that many people have requested.  A `comprehensive DCGAN example`_
is under construction.

If you already implemented Amp based on the instructions below, but it isn't behaving as expected,
please review `Advanced Amp Usage`_ to see if any topics match your use case.  If that doesn't help,
`file an issue`_.

.. _`file an issue`:
    https://github.com/NVIDIA/apex/issues

``opt_level``\ s and Properties
-------------------------------

Amp allows users to easily experiment with different pure and mixed precision modes.
Commonly-used default modes are chosen by
selecting an "optimization level" or ``opt_level``; each ``opt_level`` establishes a set of
properties that govern Amp's implementation of pure or mixed precision training.
Finer-grained control of how a given ``opt_level`` behaves can be achieved by passing values for
particular properties directly to ``amp.initialize``.  These manually specified values
override the defaults established by the ``opt_level``.

Example::

        # Declare model and optimizer as usual, with default (FP32) precision
        model = torch.nn.Linear(D_in, D_out).cuda()
        optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

        # Allow Amp to perform casts as required by the opt_level
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
        ...
        # loss.backward() becomes:
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        ...

Users **should not** manually cast their model or data to ``.half()``, regardless of what ``opt_level``
or properties are chosen.  Amp intends that users start with an existing default (FP32) script,
add the three lines corresponding to the Amp API, and begin training with mixed precision.
Amp can also be disabled, in which case the original script will behave exactly as it used to.
In this way, there's no risk adhering to the Amp API, and a lot of potential performance benefit.

.. note::
    Because it's never necessary to manually cast your model (aside from the call ``amp.initialize``)
    or input data, a script that adheres to the new API
    can switch between different ``opt-level``\ s without having to make any other changes.

.. _`runnable, comprehensive Imagenet example`:
    https://github.com/NVIDIA/apex/tree/master/examples/imagenet

.. _`comprehensive DCGAN example`:
    https://github.com/NVIDIA/apex/tree/master/examples/dcgan

.. _`Advanced Amp Usage`:
    https://nvidia.github.io/apex/advanced.html

Properties
**********

Currently, the under-the-hood properties that govern pure or mixed precision training are the following:

- ``cast_model_type``:  Casts your model's parameters and buffers to the desired type.
- ``patch_torch_functions``: Patch all Torch functions and Tensor methods to perform Tensor Core-friendly ops like GEMMs and convolutions in FP16, and any ops that benefit from FP32 precision in FP32.
- ``keep_batchnorm_fp32``:  To enhance precision and enable cudnn batchnorm (which improves performance), it's often beneficial to keep batchnorm weights in FP32 even if the rest of the model is FP16.
- ``master_weights``:  Maintain FP32 master weights to accompany any FP16 model weights.  FP32 master weights are stepped by the optimizer to enhance precision and capture small gradients.
- ``loss_scale``:  If ``loss_scale`` is a float value, use this value as the static (fixed) loss scale.  If ``loss_scale`` is the string ``"dynamic"``, adaptively adjust the loss scale over time.  Dynamic loss scale adjustments are performed by Amp automatically.

Again, you often don't need to specify these properties by hand.  Instead, select an ``opt_level``,
which will set them up for you.  After selecting an ``opt_level``, you can optionally pass property
kwargs as manual overrides.

If you attempt to override a property that does not make sense for the selected ``opt_level``,
Amp will raise an error with an explanation.  For example, selecting ``opt_level="O1"`` combined with
the override ``master_weights=True`` does not make sense.  ``O1`` inserts casts
around Torch functions rather than model weights.  Data, activations, and weights are recast
out-of-place on the fly as they flow through patched functions.  Therefore, the model weights themselves
can (and should) remain FP32, and there is no need to maintain separate FP32 master weights.

``opt_level``\ s
****************

Recognized ``opt_level``\ s are ``"O0"``, ``"O1"``, ``"O2"``, and ``"O3"``.

``O0`` and ``O3`` are not true mixed precision, but they are useful for establishing accuracy and
speed baselines, respectively.

``O1`` and ``O2`` are different implementations of mixed precision.  Try both, and see
what gives the best speedup and accuracy for your model.

``O0``:  FP32 training
^^^^^^^^^^^^^^^^^^^^^^
Your incoming model should be FP32 already, so this is likely a no-op.
``O0`` can be useful to establish an accuracy baseline.

| Default properties set by ``O0``:
| ``cast_model_type=torch.float32``
| ``patch_torch_functions=False``
| ``keep_batchnorm_fp32=None`` (effectively, "not applicable," everything is FP32)
| ``master_weights=False``
| ``loss_scale=1.0``
|
|

``O1``:  Mixed Precision (recommended for typical use)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Patch all Torch functions and Tensor methods to cast their inputs according to a whitelist-blacklist
model.  Whitelist ops (for example, Tensor Core-friendly ops like GEMMs and convolutions) are performed
in FP16.  Blacklist ops that benefit from FP32 precision (for example, softmax)
are performed in FP32.  ``O1`` also uses dynamic loss scaling, unless overridden.

| Default properties set by ``O1``:
| ``cast_model_type=None`` (not applicable)
| ``patch_torch_functions=True``
| ``keep_batchnorm_fp32=None`` (again, not applicable, all model weights remain FP32)
| ``master_weights=None`` (not applicable, model weights remain FP32)
| ``loss_scale="dynamic"``
|
|

``O2``:  "Almost FP16" Mixed Precision
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
``O2`` casts the model weights to FP16,
patches the model's ``forward`` method to cast input
data to FP16, keeps batchnorms in FP32, maintains FP32 master weights,
updates the optimizer's ``param_groups`` so that the ``optimizer.step()``
acts directly on the FP32 weights (followed by FP32 master weight->FP16 model weight
copies if necessary),
and implements dynamic loss scaling (unless overridden).
Unlike ``O1``, ``O2`` does not patch Torch functions or Tensor methods.

| Default properties set by ``O2``:
| ``cast_model_type=torch.float16``
| ``patch_torch_functions=False``
| ``keep_batchnorm_fp32=True``
| ``master_weights=True``
| ``loss_scale="dynamic"``
|
|

``O3``:  FP16 training
^^^^^^^^^^^^^^^^^^^^^^
``O3`` may not achieve the stability of the true mixed precision options ``O1`` and ``O2``.
However, it can be useful to establish a speed baseline for your model, against which
the performance of ``O1`` and ``O2`` can be compared.  If your model uses batch normalization,
to establish "speed of light" you can try ``O3`` with the additional property override
``keep_batchnorm_fp32=True`` (which enables cudnn batchnorm, as stated earlier).

| Default properties set by ``O3``:
| ``cast_model_type=torch.float16``
| ``patch_torch_functions=False``
| ``keep_batchnorm_fp32=False``
| ``master_weights=False``
| ``loss_scale=1.0``
|
|

Unified API
-----------

.. automodule:: apex.amp
.. currentmodule:: apex.amp

.. autofunction:: initialize

.. autofunction:: scale_loss

.. autofunction:: master_params

Advanced use cases
------------------

The unified Amp API supports gradient accumulation across iterations,
multiple backward passes per iteration, multiple models/optimizers,
custom/user-defined autograd functions, and custom data batch classes.  Gradient clipping and GANs also
require special treatment, but this treatment does not need to change
for different ``opt_level``\ s.  Further details can be found here:

.. toctree::
   :maxdepth: 1

   advanced

Transition guide for old API users
----------------------------------

We strongly encourage moving to the new Amp API, because it's more versatile, easier to use, and future proof.  The original :class:`FP16_Optimizer` and the old "Amp" API are deprecated, and subject to removal at at any time.

For users of the old "Amp" API
******************************

In the new API, ``opt-level O1`` performs the same patching of the Torch namespace as the old thing
called "Amp."
However, the new API allows static or dynamic loss scaling, while the old API only allowed dynamic loss scaling.

In the new API, the old call to ``amp_handle = amp.init()``, and the returned ``amp_handle``, are no
longer exposed or necessary.  The new ``amp.initialize()`` does the duty of ``amp.init()`` (and more).
Therefore, any existing calls to ``amp_handle = amp.init()`` should be deleted.

The functions formerly exposed through ``amp_handle`` are now free
functions accessible through the ``amp`` module.

The backward context manager must be changed accordingly::

    # old API
    with amp_handle.scale_loss(loss, optimizer) as scaled_loss:
        scaled_loss.backward()
    ->
    # new API
    with amp.scale_loss(loss, optimizer) as scaled_loss:
        scaled_loss.backward()

For now, the deprecated "Amp" API documentation can still be found on the Github README:  https://github.com/NVIDIA/apex/tree/master/apex/amp.  The old API calls that `annotate user functions`_ to run
with a particular precision are still honored by the new API.

.. _`annotate user functions`:
    https://github.com/NVIDIA/apex/tree/master/apex/amp#annotating-user-functions


For users of the old FP16_Optimizer
***********************************

``opt-level O2`` is equivalent to :class:`FP16_Optimizer` with ``dynamic_loss_scale=True``.
Once again, the backward pass must be changed to the unified version::

    optimizer.backward(loss)
    ->
    with amp.scale_loss(loss, optimizer) as scaled_loss:
        scaled_loss.backward()

One annoying aspect of FP16_Optimizer was that the user had to manually convert their model to half
(either by calling ``.half()`` on it, or using a function or module wrapper from
``apex.fp16_utils``), and also manually call ``.half()`` on input data.  **Neither of these are
necessary in the new API.  No matter what --opt-level
you choose, you can and should simply build your model and pass input data in the default FP32 format.**
The new Amp API will perform the right conversions during
``model, optimizer = amp.initialize(model, optimizer, opt_level=....)`` based on the ``--opt-level``
and any overridden flags.  Floating point input data may be FP32 or FP16, but you may as well just
let it be FP16, because the ``model`` returned by ``amp.initialize`` will have its ``forward``
method patched to cast the input data appropriately.


================================================
FILE: apex/docs/source/conf.py
================================================
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# PyTorch documentation build configuration file, created by
# sphinx-quickstart on Fri Dec 23 13:31:47 2016.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath('.'))
# sys.path.insert(0, os.path.abspath('../../apex/parallel/'))
import apex
# import multiproc
import sphinx_rtd_theme


# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.autosummary',
    'sphinx.ext.doctest',
    'sphinx.ext.intersphinx',
    'sphinx.ext.todo',
    'sphinx.ext.coverage',
    'sphinx.ext.mathjax',
    'sphinx.ext.napoleon',
    'sphinx.ext.viewcode',
    'sphinx.ext.extlinks',
]

napoleon_use_ivar = True

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'

# The master toctree document.
master_doc = 'index'

# General information about the project.
project = 'Apex'
copyright = '2018'
author = 'Christian Sarofeen, Natalia Gimelshein, Michael Carilli, Raul Puri'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
# TODO: change to [:2] at v1.0
# version = 'master (' + torch.__version__ + ' )'
version = '0.1'
# The full version, including alpha/beta/rc tags.
# TODO: verify this works as expected
release = '0.1.0'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path
exclude_patterns = []

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
    'collapse_navigation': False,
    'display_version': True,
    'logo_only': True,
}

# html_logo = '_static/img/nv-pytorch2.png'

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

# html_style_path = 'css/pytorch_theme.css'
html_context = {
    'css_files': [
        'https://fonts.googleapis.com/css?family=Lato',
        '_static/css/pytorch_theme.css'
    ],
}


# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'PyTorchdoc'


# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',

    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',

    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',

    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'apex.tex', 'Apex Documentation',
     'Torch Contributors', 'manual'),
]


# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
    (master_doc, 'Apex', 'Apex Documentation',
     [author], 1)
]


# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (master_doc, 'Apex', 'Apex Documentation',
     author, 'Apex', 'One line description of project.',
     'Miscellaneous'),
]


# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
    'python': ('https://docs.python.org/', None),
    'numpy': ('http://docs.scipy.org/doc/numpy/', None),
}

# -- A patch that prevents Sphinx from cross-referencing ivar tags -------
# See http://stackoverflow.com/a/41184353/3343043

from docutils import nodes
from sphinx.util.docfields import TypedField
from sphinx import addnodes


def patched_make_field(self, types, domain, items, **kw):
    # `kw` catches `env=None` needed for newer sphinx while maintaining
    #  backwards compatibility when passed along further down!

    # type: (List, unicode, Tuple) -> nodes.field
    def handle_item(fieldarg, content):
        par = nodes.paragraph()
        par += addnodes.literal_strong('', fieldarg)  # Patch: this line added
        # par.extend(self.make_xrefs(self.rolename, domain, fieldarg,
        #                           addnodes.literal_strong))
        if fieldarg in types:
            par += nodes.Text(' (')
            # NOTE: using .pop() here to prevent a single type node to be
            # inserted twice into the doctree, which leads to
            # inconsistencies later when references are resolved
            fieldtype = types.pop(fieldarg)
            if len(fieldtype) == 1 and isinstance(fieldtype[0], nodes.Text):
                typename = u''.join(n.astext() for n in fieldtype)
                typename = typename.replace('int', 'python:int')
                typename = typename.replace('long', 'python:long')
                typename = typename.replace('float', 'python:float')
                typename = typename.replace('type', 'python:type')
                par.extend(self.make_xrefs(self.typerolename, domain, typename,
                                           addnodes.literal_emphasis, **kw))
            else:
                par += fieldtype
            par += nodes.Text(')')
        par += nodes.Text(' -- ')
        par += content
        return par

    fieldname = nodes.field_name('', self.label)
    if len(items) == 1 and self.can_collapse:
        fieldarg, content = items[0]
        bodynode = handle_item(fieldarg, content)
    else:
        bodynode = self.list_type()
        for fieldarg, content in items:
            bodynode += nodes.list_item('', handle_item(fieldarg, content))
    fieldbody = nodes.field_body('', bodynode)
    return nodes.field('', fieldname, fieldbody)

TypedField.make_field = patched_make_field


================================================
FILE: apex/docs/source/fp16_utils.rst
================================================
.. role:: hidden
    :class: hidden-section

apex.fp16_utils
===================================

This submodule contains utilities designed to streamline the mixed precision training recipe 
presented by NVIDIA `on Parallel Forall`_ and in GTC 2018 Sessions 
`Training Neural Networks with Mixed Precision: Theory and Practice`_ and 
`Training Neural Networks with Mixed Precision: Real Examples`_.
For Pytorch users, Real Examples in particular is recommended.

Full runnable Python scripts demonstrating ``apex.fp16_utils`` 
can be found on the Github page:

| `Simple FP16_Optimizer demos`_
|
| `Distributed Mixed Precision Training with imagenet`_
|
| `Mixed Precision Training with word_language_model`_
|
|

.. _`on Parallel Forall`:
    https://devblogs.nvidia.com/mixed-precision-training-deep-neural-networks/
.. _`Training Neural Networks with Mixed Precision: Theory and Practice`:
    http://on-demand.gputechconf.com/gtc/2018/video/S8923/
.. _`Training Neural Networks with Mixed Precision: Real Examples`:
    http://on-demand.gputechconf.com/gtc/2018/video/S81012/
.. _`Simple FP16_Optimizer demos`:
    https://github.com/NVIDIA/apex/tree/master/examples/FP16_Optimizer_simple
.. _`Distributed Mixed Precision Training with imagenet`:
    https://github.com/NVIDIA/apex/tree/master/examples/imagenet
.. _`Mixed Precision Training with word_language_model`:
    https://github.com/NVIDIA/apex/tree/master/examples/word_language_model

.. automodule:: apex.fp16_utils
.. currentmodule:: apex.fp16_utils

Automatic management of master params + loss scaling
----------------------------------------------------

.. autoclass:: FP16_Optimizer
    :members:

.. autoclass:: LossScaler
    :members:

.. autoclass:: DynamicLossScaler
    :members:

Manual master parameter management
----------------------------------

.. autofunction:: prep_param_lists

.. autofunction:: master_params_to_model_params

.. autofunction:: model_grads_to_master_grads


================================================
FILE: apex/docs/source/index.rst
================================================
.. PyTorch documentation master file, created by
   sphinx-quickstart on Fri Dec 23 13:31:47 2016.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

:github_url: https://github.com/nvidia/apex

Apex (A PyTorch Extension)
===================================

This site contains the API documentation for Apex (https://github.com/nvidia/apex),
a Pytorch extension with NVIDIA-maintained utilities to streamline mixed precision and distributed training.  Some of the code here will be included in upstream Pytorch eventually. The intention of Apex is to make up-to-date utilities available to users as quickly as possible.

Installation instructions can be found here:  https://github.com/NVIDIA/apex#quick-start.

.. toctree::
   :maxdepth: 1
   :caption: AMP:  Automatic Mixed Precision

   amp

.. toctree::
   :maxdepth: 1
   :caption: Distributed Training

   parallel

.. toctree::
   :maxdepth: 1
   :caption: Fused Optimizers

   optimizers

.. toctree::
   :maxdepth: 1
   :caption: Fused Layer Norm

   layernorm

..   .. toctree::
     :maxdepth: 1
     :caption: Deprecated mixed precision API
     fp16_util

..   reparameterization
..   RNN
   
Indices and tables
==================

* :ref:`genindex`
* :ref:`modindex`


================================================
FILE: apex/docs/source/layernorm.rst
================================================
.. role:: hidden
    :class: hidden-section

apex.normalization.fused_layer_norm
===================================

.. automodule:: apex.normalization
.. currentmodule:: apex.normalization

.. FusedAdam
   ----------

.. autoclass:: FusedLayerNorm
    :members:


================================================
FILE: apex/docs/source/optimizers.rst
================================================
.. role:: hidden
    :class: hidden-section

apex.optimizers
===================================

.. automodule:: apex.optimizers
.. currentmodule:: apex.optimizers

.. FusedAdam
   ----------

.. autoclass:: FusedAdam
    :members:


================================================
FILE: apex/docs/source/parallel.rst
================================================
.. role:: hidden
    :class: hidden-section

apex.parallel
===================================

.. automodule:: apex.parallel
.. currentmodule:: apex.parallel

.. DistributedDataParallel
   ----------

.. autoclass:: DistributedDataParallel
    :members:

.. autoclass:: Reducer
    :members:

.. autoclass:: SyncBatchNorm
    :members:

Utility functions
----------------------------------

.. autofunction:: convert_syncbn_model


================================================
FILE: apex/examples/README.md
================================================
This directory contains examples illustrating Apex mixed precision and distributed tools.

**Note for users of the pre-unification API**:
`deprecated_api` contains examples illustrating the old (pre-unified) APIs.  These APIs will be removed soon, and users are strongly encouraged to switch.  The separate mixed precision tools called `Amp` and `FP16_Optimizer` in the old API are exposed via different flags/optimization levels in the new API.


================================================
FILE: apex/examples/dcgan/README.md
================================================
Under construction...


================================================
FILE: apex/examples/docker/Dockerfile
================================================
# Base image must at least have pytorch and CUDA installed.
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:19.03-py3
FROM $BASE_IMAGE
ARG BASE_IMAGE
RUN echo "Installing Apex on top of ${BASE_IMAGE}"
# make sure we don't overwrite some existing directory called "apex"
WORKDIR /tmp/unique_for_apex
# uninstall Apex if present, twice to make absolutely sure :)
RUN pip uninstall -y apex || :
RUN pip uninstall -y apex || :
# SHA is something the user can touch to force recreation of this Docker layer,
# and therefore force cloning of the latest version of Apex
RUN SHA=ToUcHMe git clone https://github.com/NVIDIA/apex.git
WORKDIR /tmp/unique_for_apex/apex
RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
WORKDIR /workspace


================================================
FILE: apex/examples/docker/README.md
================================================
## Option 1:  Create a new container with Apex

**Dockerfile** installs the latest Apex on top of an existing image.  Run
```
docker build -t new_image_with_apex .
```
By default, **Dockerfile** uses NVIDIA's Pytorch container as the base image,
which requires an NVIDIA GPU Cloud (NGC) account.  If you don't have an NGC account, you can sign up for free by following the instructions [here](https://docs.nvidia.com/ngc/ngc-getting-started-guide/index.html#generating-api-key).

Alternatively, you can supply your own base image via the `BASE_IMAGE` build-arg.
`BASE_IMAGE` must have Pytorch and Cuda installed.  For example, any
`-devel` image for Pytorch 1.0 and later from the
[official Pytorch Dockerhub](https://hub.docker.com/r/pytorch/pytorch) may be used:
```
docker build --build-arg BASE_IMAGE=pytorch/pytorch:nightly-devel-cuda10.0-cudnn7 -t new_image_with_apex .
```

If you want to rebuild your image, and force the latest Apex to be cloned and installed, make any small change to the `SHA` variable in **Dockerfile**.

**Warning:**
Currently, the non-`-devel` images on Pytorch Dockerhub do not contain the Cuda compiler `nvcc`.  Therefore,
images whose name does not contain `-devel` are not eligible candidates for `BASE_IMAGE`.

### Running your Apex container

Like any Cuda-enabled Pytorch container, a container with Apex should be run via [nvidia-docker](https://github.com/NVIDIA/nvidia-docker), for example:
```
docker run --runtime=nvidia -it --rm --ipc=host new_image_with_apex
```

## Option 2:  Install Apex in a running container

Instead of building a new container, it is also a viable option to `git clone https://github.com/NVIDIA/apex.git` on bare metal, mount the Apex repo into your container at launch by running, for example,
```
docker run --runtime=nvidia -it --rm --ipc=host -v /bare/metal/apex:/apex/in/container <base image>
```
then go to /apex/in/container within the running container and
```
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
```


================================================
FILE: apex/examples/imagenet/README.md
================================================
# Mixed Precision ImageNet Training in PyTorch

`main_amp.py` is based on [https://github.com/pytorch/examples/tree/master/imagenet](https://github.com/pytorch/examples/tree/master/imagenet).
It implements Automatic Mixed Precision (Amp) training of popular model architectures, such as ResNet, AlexNet, and VGG, on the ImageNet dataset.  Command-line flags forwarded to `amp.initialize` are used to easily manipulate and switch between various pure and mixed precision "optimization levels" or `opt_level`s.  For a detailed explanation of `opt_level`s, see the [updated API guide](https://nvidia.github.io/apex/amp.html).

Three lines enable Amp:
```
# Added after model and optimizer construction
model, optimizer = amp.initialize(model, optimizer, flags...)
...
# loss.backward() changed to:
with amp.scale_loss(loss, optimizer) as scaled_loss:
    scaled_loss.backward()
```

With the new Amp API **you never need to explicitly convert your model, or the input data, to half().**

## Requirements

- Download the ImageNet dataset and move validation images to labeled subfolders
    - The following script may be helpful: https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh

## Training

To train a model, create softlinks to the Imagenet dataset, then run `main.py` with the desired model architecture, as shown in `Example commands` below.

The default learning rate schedule is set for ResNet50.  `main_amp.py` script rescales the learning rate according to the global batch size (number of distributed processes \* per-process minibatch size).

## Example commands

**Note:**  batch size `--b 224` assumes your GPUs have >=16GB of onboard memory.  You may be able to increase this to 256, but that's cutting it close, so it may out-of-memory for different Pytorch versions.

**Note:**  All of the following use 4 dataloader subprocesses (`--workers 4`) to reduce potential
CPU data loading bottlenecks.

**Note:**  `--opt-level` `O1` and `O2` both use dynamic loss scaling by default unless manually overridden.
`--opt-level` `O0` and `O3` (the "pure" training modes) do not use loss scaling by default.
`O0` and `O3` can be told to use loss scaling via manual overrides, but using loss scaling with `O0`
(pure FP32 training) does not really make sense, and will trigger a warning.

Softlink training and validation datasets into the current directory:
```
$ ln -sf /data/imagenet/train-jpeg/ train
$ ln -sf /data/imagenet/val-jpeg/ val
```

### Summary

Amp allows easy experimentation with various pure and mixed precision options.
```
$ python main_amp.py -a resnet50 --b 128 --workers 4 --opt-level O0 ./
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O3 ./
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O3 --keep-batchnorm-fp32 True ./
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O1 ./
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O1 --loss-scale 128.0 ./
$ python -m torch.distributed.launch --nproc_per_node=2 main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O1 ./
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O2 ./
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O2 --loss-scale 128.0 ./
$ python -m torch.distributed.launch --nproc_per_node=2 main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O2 ./
```
Options are explained below.  Again, the [updated API guide](https://nvidia.github.io/apex/amp.html) provides more detail.

#### `--opt-level O0` (FP32 training) and `O3` (FP16 training)

"Pure FP32" training:
```
$ python main_amp.py -a resnet50 --b 128 --workers 4 --opt-level O0 ./
```
"Pure FP16" training:
```
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O3 ./
```
FP16 training with FP32 batchnorm:
```
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O3 --keep-batchnorm-fp32 True ./
```
Keeping the batchnorms in FP32 improves stability and allows Pytorch
to use cudnn batchnorms, which significantly increases speed in Resnet50.

The `O3` options might not converge, because they are not true mixed precision.
However, they can be useful to establish "speed of light" performance for
your model, which provides a baseline for comparison with `O1` and `O2`.
For Resnet50 in particular, `--opt-level O3 --keep-batchnorm-fp32 True` establishes
the "speed of light."  (Without `--keep-batchnorm-fp32`, it's slower, because it does
not use cudnn batchnorm.)

#### `--opt-level O1` ("conservative mixed precision")

`O1` patches Torch functions to cast inputs according to a whitelist-blacklist model.
FP16-friendly (Tensor Core) ops like gemms and convolutions run in FP16, while ops
that benefit from FP32, like batchnorm and softmax, run in FP32.
Also, dynamic loss scaling is used by default.
```
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O1 ./
```
`O1` overridden to use static loss scaling:
```
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O1 --loss-scale 128.0
```
Distributed training with 2 processes (1 GPU per process, see **Distributed training** below
for more detail)
```
$ python -m torch.distributed.launch --nproc_per_node=2 main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O1 ./
```
For best performance, set `--nproc_per_node` equal to the total number of GPUs on the node
to use all available resources.

#### `--opt-level O2` ("fast mixed precision")

`O2` casts the model to FP16, keeps batchnorms in FP32,
maintains master weights in FP32, and implements
dynamic loss scaling by default. (Unlike --opt-level O1, --opt-level O2
does not patch Torch functions.)
```
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O2 ./
```
"Fast mixed precision" overridden to use static loss scaling:
```
$ python main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O2 --loss-scale 128.0 ./
```
Distributed training with 2 processes (1 GPU per process)
```
$ python -m torch.distributed.launch --nproc_per_node=2 main_amp.py -a resnet50 --b 224 --workers 4 --opt-level O2 ./
```

## Distributed training

`main_amp.py` optionally uses `apex.parallel.DistributedDataParallel` (DDP) for multiprocess training with one GPU per process.
```
model = apex.parallel.DistributedDataParallel(model)
```
is a drop-in replacement for
```
model = torch.nn.parallel.DistributedDataParallel(model,
                                                  device_ids=[arg.local_rank],
                                                  output_device=arg.local_rank)
```
(because Torch DDP permits multiple GPUs per process, with Torch DDP you are required to
manually specify the device to run on and the output device.
With Apex DDP, it uses only the current device by default).

The choice of DDP wrapper (Torch or Apex) is orthogonal to the use of Amp and other Apex tools.  It is safe to use `apex.amp` with either `torch.nn.parallel.DistributedDataParallel` or `apex.parallel.DistributedDataParallel`.  In the future, I may add some features that permit optional tighter integration between `Amp` and `apex.parallel.DistributedDataParallel` for marginal performance benefits, but currently, there's no compelling reason to use Apex DDP versus Torch DDP for most models.

To use DDP with `apex.amp`, the only gotcha is that
```
model, optimizer = amp.initialize(model, optimizer, flags...)
```
must precede
```
model = DDP(model)
```
If DDP wrapping occurs before `amp.initialize`, `amp.initialize` will raise an error.

With both Apex DDP and Torch DDP, you must also call `torch.cuda.set_device(args.local_rank)` within
each process prior to initializing your model or any other tensors.
More information can be found in the docs for the
Pytorch multiprocess launcher module [torch.distributed.launch](https://pytorch.org/docs/stable/distributed.html#launch-utility).

`main_amp.py` is written to interact with 
[torch.distributed.launch](https://pytorch.org/docs/master/distributed.html#launch-utility),
which spawns multiprocess jobs using the following syntax:
```
python -m torch.distributed.launch --nproc_per_node=NUM_GPUS main_amp.py args...
```
`NUM_GPUS` should be less than or equal to the number of visible GPU devices on the node.  The use of `torch.distributed.launch` is unrelated to the choice of DDP wrapper.  It is safe to use either apex DDP or torch DDP with `torch.distributed.launch`.

Optionally, one can run imagenet with synchronized batch normalization across processes by adding
`--sync_bn` to the `args...`

## Deterministic training (for debugging purposes)

Running with the `--deterministic` flag should produce bitwise identical outputs run-to-run,
regardless of what other options are used (see [Pytorch docs on reproducibility](https://pytorch.org/docs/stable/notes/randomness.html)).
Since `--deterministic` disables `torch.backends.cudnn.benchmark`, `--deterministic` may
cause a modest performance decrease.


================================================
FILE: apex/examples/imagenet/main_amp.py
================================================
import argparse
import os
import shutil
import time

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

import numpy as np

try:
    from apex.parallel import DistributedDataParallel as DDP
    from apex.fp16_utils import *
    from apex import amp, optimizers
    from apex.multi_tensor_apply import multi_tensor_applier
except ImportError:
    raise ImportError("Please install apex from https://www.github.com/nvidia/apex to run this example.")

model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))

parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('data', metavar='DIR',
                    help='path to dataset')
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
                    choices=model_names,
                    help='model architecture: ' +
                    ' | '.join(model_names) +
                    ' (default: resnet18)')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                    help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
                    help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                    help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=256, type=int,
                    metavar='N', help='mini-batch size per process (default: 256)')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='Initial learning rate.  Will be scaled by <global batch size>/256: args.lr = args.lr*float(args.batch_size*args.world_size)/256.  A warmup schedule will also be applied over the first 5 epochs.')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum')
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)')
parser.add_argument('--print-freq', '-p', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                    help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
                    help='use pre-trained model')

parser.add_argument('--prof', dest='prof', action='store_true',
                    help='Only run 10 iterations for profiling.')
parser.add_argument('--deterministic', action='store_true')

parser.add_argument("--local_rank", default=0, type=int)
parser.add_argument('--sync_bn', action='store_true',
                    help='enabling apex sync BN.')

parser.add_argument('--opt-level', type=str)
parser.add_argument('--keep-batchnorm-fp32', type=str, default=None)
parser.add_argument('--loss-scale', type=str, default=None)

cudnn.benchmark = True

def fast_collate(batch):
    imgs = [img[0] for img in batch]
    targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
    w = imgs[0].size[0]
    h = imgs[0].size[1]
    tensor = torch.zeros( (len(imgs), 3, h, w), dtype=torch.uint8 )
    for i, img in enumerate(imgs):
        nump_array = np.asarray(img, dtype=np.uint8)
        if(nump_array.ndim < 3):
            nump_array = np.expand_dims(nump_array, axis=-1)
        nump_array = np.rollaxis(nump_array, 2)

        tensor[i] += torch.from_numpy(nump_array)
        
    return tensor, targets

best_prec1 = 0
args = parser.parse_args()

print("opt_level = {}".format(args.opt_level))
print("keep_batchnorm_fp32 = {}".format(args.keep_batchnorm_fp32), type(args.keep_batchnorm_fp32))
print("loss_scale = {}".format(args.loss_scale), type(args.loss_scale))

print("\nCUDNN VERSION: {}\n".format(torch.backends.cudnn.version()))

if args.deterministic:
    cudnn.benchmark = False
    cudnn.deterministic = True
    torch.manual_seed(args.local_rank)
    torch.set_printoptions(precision=10)

def main():
    global best_prec1, args

    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled."

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.sync_bn:
        import apex
        print("using apex synced BN")
        model = apex.parallel.convert_syncbn_model(model)

    model = model.cuda()

    # Scale learning rate based on global batch size
    args.lr = args.lr*float(args.batch_size*args.world_size)/256. 
    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # Initialize Amp.  Amp accepts either values or strings for the optional override arguments,
    # for convenient interoperation with argparse.
    model, optimizer = amp.initialize(model, optimizer,
                                      opt_level=args.opt_level,
                                      keep_batchnorm_fp32=args.keep_batchnorm_fp32,
                                      loss_scale=args.loss_scale
                                      )

    # For distributed training, wrap the model with apex.parallel.DistributedDataParallel.
    # This must be done AFTER the call to amp.initialize.  If model = DDP(model) is called
    # before model, ... = amp.initialize(model, ...), the call to amp.initialize may alter
    # the types of model's parameters in a way that disrupts or destroys DDP's allreduce hooks.
    if args.distributed:
        # By default, apex.parallel.DistributedDataParallel overlaps communication with 
        # computation in the backward pass.
        # model = DDP(model)
        # delay_allreduce delays all communication to the end of the backward pass.
        model = DDP(model, delay_allreduce=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    # Optionally resume from a checkpoint
    if args.resume:
        # Use a local scope to avoid dangling references
        def resume():
            if os.path.isfile(args.resume):
                print("=> loading checkpoint '{}'".format(args.resume))
                checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda(args.gpu))
                args.start_epoch = checkpoint['epoch']
                best_prec1 = checkpoint['best_prec1']
                model.load_state_dict(checkpoint['state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint '{}' (epoch {})"
                      .format(args.resume, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(args.resume))
        resume()

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')

    if(args.arch == "inception_v3"):
        raise RuntimeError("Currently, inception_v3 is not supported by this example.")
        # crop_size = 299
        # val_size = 320 # I chose this value arbitrarily, we can adjust.
    else:
        crop_size = 224
        val_size = 256

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(crop_size),
            transforms.RandomHorizontalFlip(),
            # transforms.ToTensor(), Too slow
            # normalize,
        ]))
    val_dataset = datasets.ImageFolder(valdir, transforms.Compose([
            transforms.Resize(val_size),
            transforms.CenterCrop(crop_size),
        ]))

    train_sampler = None
    val_sampler = None
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate)

    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True,
        sampler=val_sampler,
        collate_fn=fast_collate)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)
        if args.prof:
            break
        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        if args.local_rank == 0:
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer' : optimizer.state_dict(),
            }, is_best)

class data_prefetcher():
    def __init__(self, loader):
        self.loader = iter(loader)
        self.stream = torch.cuda.Stream()
        self.mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).cuda().view(1,3,1,1)
        self.std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).cuda().view(1,3,1,1)
        # With Amp, it isn't necessary to manually convert data to half.
        # if args.fp16:
        #     self.mean = self.mean.half()
        #     self.std = self.std.half()
        self.preload()

    def preload(self):
        try:
            self.next_input, self.next_target = next(self.loader)
        except StopIteration:
            self.next_input = None
            self.next_target = None
            return
        # if record_stream() doesn't work, another option is to make sure device inputs are created
        # on the main stream.
        # self.next_input_gpu = torch.empty_like(self.next_input, device='cuda')
        # self.next_target_gpu = torch.empty_like(self.next_target, device='cuda')
        # Need to make sure the memory allocated for next_* is not still in use by the main stream
        # at the time we start copying to next_*:
        # self.stream.wait_stream(torch.cuda.current_stream())
        with torch.cuda.stream(self.stream):
            self.next_input = self.next_input.cuda(non_blocking=True)
            self.next_target = self.next_target.cuda(non_blocking=True)
            # more code for the alternative if record_stream() doesn't work:
            # copy_ will record the use of the pinned source tensor in this side stream.
            # self.next_input_gpu.copy_(self.next_input, non_blocking=True)
            # self.next_target_gpu.copy_(self.next_target, non_blocking=True)
            # self.next_input = self.next_input_gpu
            # self.next_target = self.next_target_gpu

            # With Amp, it isn't necessary to manually convert data to half.
            # if args.fp16:
            #     self.next_input = self.next_input.half()
            # else:
            self.next_input = self.next_input.float()
            self.next_input = self.next_input.sub_(self.mean).div_(self.std)
            
    def next(self):
        torch.cuda.current_stream().wait_stream(self.stream)
        input = self.next_input
        target = self.next_target
        input.record_stream(torch.cuda.current_stream())
        target.record_stream(torch.cuda.current_stream())
        self.preload()
        return input, target


def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()
    end = time.time()

    prefetcher = data_prefetcher(train_loader)
    input, target = prefetcher.next()
    i = 0
    while input is not None:
        i += 1

        adjust_learning_rate(optimizer, epoch, i, len(train_loader))

        if args.prof:
            if i > 10:
                break

        # compute output
        if args.prof: torch.cuda.nvtx.range_push("forward")
        output = model(input)
        if args.prof: torch.cuda.nvtx.range_pop()
        loss = criterion(output, target)

        # compute gradient and do SGD step
        optimizer.zero_grad()

        if args.prof: torch.cuda.nvtx.range_push("backward")
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        if args.prof: torch.cuda.nvtx.range_pop()

        # for param in model.parameters():
        #     print(param.data.double().sum().item(), param.grad.data.double().sum().item())

        if args.prof: torch.cuda.nvtx.range_push("step")
        optimizer.step()
        if args.prof: torch.cuda.nvtx.range_pop()

        if i%args.print_freq == 0:
            # Every print_freq iterations, check the loss, accuracy, and speed.
            # For best performance, it doesn't make sense to print these metrics every
            # iteration, since they incur an allreduce and some host<->device syncs.

            # Measure accuracy
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
   
            # Average loss and accuracy across processes for logging 
            if args.distributed:
                reduced_loss = reduce_tensor(loss.data)
                prec1 = reduce_tensor(prec1)
                prec5 = reduce_tensor(prec5)
            else:
                reduced_loss = loss.data
   
            # to_python_float incurs a host<->device sync
            losses.update(to_python_float(reduced_loss), input.size(0))
            top1.update(to_python_float(prec1), input.size(0))
            top5.update(to_python_float(prec5), input.size(0))
    
            torch.cuda.synchronize()
            batch_time.update((time.time() - end)/args.print_freq)
            end = time.time()

            if args.local_rank == 0:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Speed {3:.3f} ({4:.3f})\t'
                      'Loss {loss.val:.10f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                       epoch, i, len(train_loader),
                       args.world_size*args.batch_size/batch_time.val,
                       args.world_size*args.batch_size/batch_time.avg,
                       batch_time=batch_time,
                       loss=losses, top1=top1, top5=top5))

        input, target = prefetcher.next()


def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()

    prefetcher = data_prefetcher(val_loader)
    input, target = prefetcher.next()
    i = 0
    while input is not None:
        i += 1

        # compute output
        with torch.no_grad():
            output = model(input)
            loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))

        if args.distributed:
            reduced_loss = reduce_tensor(loss.data)
            prec1 = reduce_tensor(prec1)
            prec5 = reduce_tensor(prec5)
        else:
            reduced_loss = loss.data

        losses.update(to_python_float(reduced_loss), input.size(0))
        top1.update(to_python_float(prec1), input.size(0))
        top5.update(to_python_float(prec5), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # TODO:  Change timings to mirror train().
        if args.local_rank == 0 and i % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Speed {2:.3f} ({3:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, len(val_loader),
                   args.world_size * args.batch_size / batch_time.val,
                   args.world_size * args.batch_size / batch_time.avg,
                   batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))

        input, target = prefetcher.next()

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    return top1.avg


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch, step, len_epoch):
    """LR schedule that should yield 76% converged accuracy with batch size 256"""
    factor = epoch // 30

    if epoch >= 80:
        factor = factor + 1

    lr = args.lr*(0.1**factor)

    """Warmup"""
    if epoch < 5:
        lr = lr*float(1 + step + epoch*len_epoch)/(5.*len_epoch)

    # if(args.local_rank == 0):
    #     print("epoch = {}, step = {}, lr = {}".format(epoch, step, lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


def reduce_tensor(tensor):
    rt = tensor.clone()
    dist.all_reduce(rt, op=dist.reduce_op.SUM)
    rt /= args.world_size
    return rt

if __name__ == '__main__':
    main()


================================================
FILE: apex/examples/simple/distributed/README.md
================================================
**distributed_data_parallel.py** and **run.sh** show an example using Amp with
[apex.parallel.DistributedDataParallel](https://nvidia.github.io/apex/parallel.html) or
[torch.nn.parallel.DistributedDataParallel](https://pytorch.org/docs/stable/nn.html#distributeddataparallel)
and the Pytorch multiprocess launcher script,
[torch.distributed.launch](https://pytorch.org/docs/master/distributed.html#launch-utility).
The use of `Amp` with DistributedDataParallel does not need to change from ordinary 
single-process use.  The only gotcha is that wrapping your model with `DistributedDataParallel` must
come after the call to `amp.initialize`.  Test via
```bash
bash run.sh
```

**This is intended purely as an instructional example, not a performance showcase.**


================================================
FILE: apex/examples/simple/distributed/distributed_data_parallel.py
================================================
import torch
import argparse
import os
from apex import amp
# FOR DISTRIBUTED: (can also use torch.nn.parallel.DistributedDataParallel instead)
from apex.parallel import DistributedDataParallel

parser = argparse.ArgumentParser()
# FOR DISTRIBUTED:  Parse for the local_rank argument, which will be supplied
# automatically by torch.distributed.launch.
parser.add_argument("--local_rank", default=0, type=int)
args = parser.parse_args()

# FOR DISTRIBUTED:  If we are running under torch.distributed.launch,
# the 'WORLD_SIZE' environment variable will also be set automatically.
args.distributed = False
if 'WORLD_SIZE' in os.environ:
    args.distributed = int(os.environ['WORLD_SIZE']) > 1

if args.distributed:
    # FOR DISTRIBUTED:  Set the device according to local_rank.
    torch.cuda.set_device(args.local_rank)

    # FOR DISTRIBUTED:  Initialize the backend.  torch.distributed.launch will provide
    # environment variables, and requires that you use init_method=`env://`.
    torch.distributed.init_process_group(backend='nccl',
                                         init_method='env://')

torch.backends.cudnn.benchmark = True

N, D_in, D_out = 64, 1024, 16

# Each process receives its own batch of "fake input data" and "fake target data."
# The "training loop" in each process just uses this fake batch over and over.
# https://github.com/NVIDIA/apex/tree/master/examples/imagenet provides a more realistic
# example of distributed data sampling for both training and validation.
x = torch.randn(N, D_in, device='cuda')
y = torch.randn(N, D_out, device='cuda')

model = torch.nn.Linear(D_in, D_out).cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

model, optimizer = amp.initialize(model, optimizer, opt_level="O1")

if args.distributed:
    # FOR DISTRIBUTED:  After amp.initialize, wrap the model with
    # apex.parallel.DistributedDataParallel.
    model = DistributedDataParallel(model)
    # torch.nn.parallel.DistributedDataParallel is also fine, with some added args:
    # model = torch.nn.parallel.DistributedDataParallel(model,
    #                                                   device_ids=[args.local_rank],
    #                                                   output_device=args.local_rank)

loss_fn = torch.nn.MSELoss()

for t in range(500):
    optimizer.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    with amp.scale_loss(loss, optimizer) as scaled_loss:
        scaled_loss.backward()
    optimizer.step()

if args.local_rank == 0:
    print("final loss = ", loss)


================================================
FILE: apex/examples/simple/distributed/run.sh
================================================
#!/bin/bash
python -m torch.distributed.launch --nproc_per_node=2 distributed_data_parallel.py


================================================
FILE: apex/setup.py
================================================
import torch
from setuptools import setup, find_packages
import subprocess

import sys

if not torch.cuda.is_available():
    print("\nWarning: Torch did not find available GPUs on this system.\n",
          "If your intention is to cross-compile, this is not an error.\n")

print("torch.__version__  = ", torch.__version__)
TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])

if TORCH_MAJOR == 0 and TORCH_MINOR < 4:
      raise RuntimeError("Apex requires Pytorch 0.4 or newer.\n" +
                         "The latest stable release can be obtained from https://pytorch.org/")

cmdclass = {}
ext_modules = []

if "--cpp_ext" in sys.argv or "--cuda_ext" in sys.argv:
    if TORCH_MAJOR == 0:
        raise RuntimeError("--cpp_ext requires Pytorch 1.0 or later, "
                           "found torch.__version__ = {}".format(torch.__version__))
    from torch.utils.cpp_extension import BuildExtension
    cmdclass['build_ext'] = BuildExtension

if "--cpp_ext" in sys.argv:
    from torch.utils.cpp_extension import CppExtension
    sys.argv.remove("--cpp_ext")
    ext_modules.append(
        CppExtension('apex_C',
                     ['csrc/flatten_unflatten.cpp',]))

def check_cuda_torch_binary_vs_bare_metal(cuda_dir):
    raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True)
    output = raw_output.split()
    release_idx = output.index("release") + 1
    release = output[release_idx].split(".")
    bare_metal_major = release[0]
    bare_metal_minor = release[1][0]
    torch_binary_major = torch.version.cuda.split(".")[0]
    torch_binary_minor = torch.version.cuda.split(".")[1]

    print("\nCompiling cuda extensions with")
    print(raw_output + "from " + cuda_dir + "/bin\n")

    if (bare_metal_major != torch_binary_major) or (bare_metal_minor != torch_binary_minor):
        raise RuntimeError("Cuda extensions are being compiled with a version of Cuda that does " +
                           "not match the version used to compile Pytorch binaries.  " +
                           "Pytorch binaries were compiled with Cuda {}.\n".format(torch.version.cuda) +
                           "In some cases, a minor-version mismatch will not cause later errors:  " +
                           "https://github.com/NVIDIA/apex/pull/323#discussion_r287021798.  "
                           "You can try commenting out this check (at your own risk).")

if "--cuda_ext" in sys.argv:
    from torch.utils.cpp_extension import CUDAExtension
    sys.argv.remove("--cuda_ext")

    if torch.utils.cpp_extension.CUDA_HOME is None:
        raise RuntimeError("--cuda_ext was requested, but nvcc was not found.  Are you sure your environment has nvcc available?  If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, only images whose names contain 'devel' will provide nvcc.")
    else:
        check_cuda_torch_binary_vs_bare_metal(torch.utils.cpp_extension.CUDA_HOME)

        # Set up macros for forward/backward compatibility hack around
        # https://github.com/pytorch/pytorch/commit/4404762d7dd955383acee92e6f06b48144a0742e
        version_ge_1_1 = []
        if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 0):
            version_ge_1_1 = ['-DVERSION_GE_1_1']

        ext_modules.append(
            CUDAExtension(name='amp_C',
                          sources=['csrc/amp_C_frontend.cpp',
                                   'csrc/multi_tensor_scale_kernel.cu',
                                   'csrc/multi_tensor_axpby_kernel.cu',
                                   'csrc/multi_tensor_l2norm_kernel.cu',
                                   'csrc/multi_tensor_lamb_stage_1.cu',
                                   'csrc/multi_tensor_lamb_stage_2.cu'],
                          extra_compile_args={'cxx': ['-O3'],
                                              'nvcc':['-lineinfo',
                                                      '-O3',
                                                      # '--resource-usage',
                                                      '--use_fast_math']}))
        ext_modules.append(
            CUDAExtension(name='fused_adam_cuda',
                          sources=['csrc/fused_adam_cuda.cpp',
                                   'csrc/fused_adam_cuda_kernel.cu'],
                          extra_compile_args={'cxx': ['-O3',],
                                              'nvcc':['-O3',
                                                      '--use_fast_math']}))
        ext_modules.append(
            CUDAExtension(name='syncbn',
                          sources=['csrc/syncbn.cpp',
                                   'csrc/welford.cu']))
        ext_modules.append(
            CUDAExtension(name='fused_layer_norm_cuda',
                          sources=['csrc/layer_norm_cuda.cpp',
                                   'csrc/layer_norm_cuda_kernel.cu'],
                          extra_compile_args={'cxx': ['-O3'] + version_ge_1_1,
                                              'nvcc':['-maxrregcount=50',
                                                      '-O3',
                                                      '--use_fast_math'] + version_ge_1_1}))

setup(
    name='apex',
    version='0.1',
    packages=find_packages(exclude=('build',
                                    'csrc',
                                    'include',
                                    'tests',
                                    'dist',
                                    'docs',
                                    'tests',
                                    'examples',
                                    'apex.egg-info',)),
    description='PyTorch Extensions written by NVIDIA',
    ext_modules=ext_modules,
    cmdclass=cmdclass,
)


================================================
FILE: apex/tests/L0/run_amp/__init__.py
================================================


================================================
FILE: apex/tests/L0/run_amp/test_add_param_group.py
================================================
import unittest

import functools as ft
import itertools as it

from apex import amp
from apex.amp import _amp_state
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn import Parameter

from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

class MyModel(torch.nn.Module):
    def __init__(self, unique):
        super(MyModel, self).__init__()
        self.weight0 = Parameter(unique +
            torch.arange(2, device='cuda', dtype=torch.float32))
        self.weight1 = Parameter(1. + unique + torch.arange(2, device='cuda', dtype=torch.float16))

    @staticmethod
    def ops(input, weight0, weight1):
        return ((input*(weight0.float()))*(weight1.float())).sum()

    def forward(self, input):
        return self.ops(input, self.weight0, self.weight1)


# Abandon all hope, ye who enter here.


class TestAddParamGroup(unittest.TestCase):
    def setUp(self):
        self.x = torch.ones((2), device='cuda', dtype=torch.float32)
        common_init(self)

    def tearDown(self):
        pass

    def zero_grad(self, models, optimizer, how_to_zero):
        if how_to_zero == "none":
            for model in models:
                for param in model.parameters():
                    param.grad = None
        elif how_to_zero == "model":
            for model in models:
                model.zero_grad()
        elif how_to_zero == "optimizer":
            optimizer.zero_grad()

    def test_add_param_group(self):
        for opt_level in ("O0", "O1", "O2", "O3"):
          for zero_before_add in (True, False):
            for try_accumulation in (True, False):
              model0 = MyModel(1)
              model1 = MyModel(2)

              optimizer = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25}],
                                          momentum=0.125)

              optimizer.zero_grad()
              loss = model0(self.x)
              loss.backward()
              optimizer.step()

              if zero_before_add:
                  optimizer.zero_grad()
              optimizer.add_param_group({'params' : model1.parameters(), 'lr' : 0.5})
              if not zero_before_add:
                  optimizer.zero_grad()

              loss = model0(self.x) + model1(self.x)
              loss.backward(retain_graph=try_accumulation)
              if try_accumulation:
                  loss.backward()
              optimizer.step()

              # Once more to make sure the new params pick up momemtums properly
              optimizer.zero_grad()
              loss = model0(self.x) + model1(self.x)
              loss.backward(retain_graph=try_accumulation)
              if try_accumulation:
                  loss.backward()
              optimizer.step()

              reference_params = [param.data.clone() for param in model0.parameters()] + \
                                 [param.data.clone() for param in model1.parameters()]

              for how_to_zero in "none", "model", "optimizer":
                  model0 = MyModel(1)
                  model1 = MyModel(2)

                  optimizer = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25}],
                                              momentum=0.125)

                  _amp_state.allow_incoming_model_not_fp32 = True
                  [model0, model1], optimizer = amp.initialize([model0, model1],
                      optimizer,
                      opt_level=opt_level,
                      verbosity=0,
                      cast_model_type=False)
                  _amp_state.allow_incoming_model_not_fp32 = False

                  _amp_state.loss_scalers[0]._loss_scale = 4.0

                  self.zero_grad([model0, model1], optimizer, how_to_zero)
                  loss = model0(self.x)
                  with amp.scale_loss(loss, optimizer) as scaled_loss:
                      scaled_loss.backward()
                  optimizer.step()

                  if zero_before_add:
                      self.zero_grad([model0, model1], optimizer, how_to_zero)
                  optimizer.add_param_group({'params' : model1.parameters(), 'lr' : 0.5})
                  if not zero_before_add:
                      self.zero_grad([model0, model1], optimizer, how_to_zero)

                  loss = model0(self.x) + model1(self.x)
                  with amp.scale_loss(loss, optimizer) as scaled_loss:
                      scaled_loss.backward(retain_graph=try_accumulation)
                  if try_accumulation:
                      with amp.scale_loss(loss, optimizer) as scaled_loss:
                          scaled_loss.backward()
                  optimizer.step()

                  # Once more to make sure the new params pick up momentums properly
                  self.zero_grad([model0, model1], optimizer, how_to_zero)
                  loss = model0(self.x) + model1(self.x)
                  with amp.scale_loss(loss, optimizer) as scaled_loss:
                      scaled_loss.backward(retain_graph=try_accumulation)
                  if try_accumulation:
                      with amp.scale_loss(loss, optimizer) as scaled_loss:
                          scaled_loss.backward()
                  optimizer.step()

                  final_params = [param.data.clone() for param in model0.parameters()] + \
                                 [param.data.clone() for param in model1.parameters()]

                  for reference, final in zip(reference_params, final_params):
                      self.assertTrue(torch.allclose(reference.to(final.dtype), final),
                                      "opt_level = {}, how_to_zero = {}, zero_before_add = {}".format(
                                      opt_level, how_to_zero, zero_before_add))


if __name__ == '__main__':
    unittest.main()


================================================
FILE: apex/tests/L0/run_amp/test_basic_casts.py
================================================
import unittest

import functools as ft
import itertools as it

from apex import amp
import torch
from torch import nn
import torch.nn.functional as F

from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

def run_layer_test(test_case, fns, expected, input_shape, test_backward=True):
    for fn, typ in it.product(fns, expected.keys()):
        x = torch.randn(input_shape, dtype=typ).requires_grad_()
        y = fn(x)
        test_case.assertEqual(y.type(), expected[typ])
        if test_backward:
            y.float().sum().backward()
            test_case.assertEqual(x.grad.type(), MATCH_INPUT[typ])

class TestBasicCasts(unittest.TestCase):
    def setUp(self):
        self.handle = amp.init(enabled=True)
        common_init(self)

    def tearDown(self):
        self.handle._deactivate()

    def test_linear_is_half(self):
        m = nn.Linear(self.h, self.h)
        f = ft.partial(F.linear, weight=m.weight, bias=m.bias)
        run_layer_test(self, [m, f], ALWAYS_HALF, (self.b, self.h))

    def test_conv2d_is_half(self):
        m = nn.Conv2d(self.c, self.c, self.k)
        f = ft.partial(F.conv2d, weight=m.weight, bias=m.bias)
        run_layer_test(self, [m, f], ALWAYS_HALF, (self.b, self.c, self.h, self.h))

    def test_softmax_is_float(self):
        m = nn.Softmax(dim=1)
        f = ft.partial(F.softmax, dim=1)
        run_layer_test(self, [m, f], ALWAYS_FLOAT, (self.b, self.h))

    def test_group_norm_is_float(self):
        m = nn.GroupNorm(num_groups=4, num_channels=self.c)
        run_layer_test(self, [m], ALWAYS_FLOAT, (self.b, self.c, self.h, self.h))

    def test_mse_loss_is_float(self):
        shape = (self.b, self.h)
        target = torch.randn(shape)
        mod = nn.MSELoss()
        m = lambda x: mod(x, target)
        f = ft.partial(F.mse_loss, target=target)
        run_layer_test(self, [m], ALWAYS_FLOAT, shape)

    def test_relu_is_match(self):
        run_layer_test(self, [nn.ReLU(), F.relu], MATCH_INPUT, (self.b, self.h))

    def test_batch_norm_is_match(self):
        m = nn.BatchNorm2d(num_features=self.c)
        f = ft.partial(F.batch_norm, running_mean=m.running_mean, running_var=m.running_var,
                       weight=m.weight, bias=m.bias, training=True)
        run_layer_test(self, [m], MATCH_INPUT, (self.b, self.c, self.h, self.h))

        # Test forward-only for BN inference
        m.eval()
        f = ft.partial(F.batch_norm, running_mean=m.running_mean, running_var=m.running_var,
                       weight=m.weight, bias=m.bias, training=False)
        run_layer_test(self, [m, f], MATCH_INPUT, (self.b, self.c, self.h, self.h),
                            test_backward=False)

class TestBannedMethods(unittest.TestCase):
    def setUp(self):
        self.handle = amp.init(enabled=True)
        common_init(self)

    def tearDown(self):
        self.handle._deactivate()

    def bce_common(self, assertion):
        shape = (self.b, self.h)
        target = torch.rand(shape)
        mod = nn.BCELoss()
        m = lambda x: mod(x, target)
        f = ft.partial(F.binary_cross_entropy, target=target)
        for fn in [m, f]:
            x = torch.rand(shape, dtype=torch.half)
            assertion(fn, x)

    def test_bce_raises_by_default(self):
        assertion = lambda fn, x: self.assertRaises(NotImplementedError, fn, x)
        self.bce_common(assertion)

    def test_bce_is_float_with_allow_banned(self):
        self.handle._deactivate()
        self.handle = amp.init(enabled=True, allow_banned=True)
        assertion = lambda fn, x: self.assertEqual(fn(x).type(), FLOAT)
        self.bce_common(assertion)

class TestTensorCasts(unittest.TestCase):
    def setUp(self):
        self.handle = amp.init(enabled=True)
        common_init(self)

    def tearDown(self):
        self.handle._deactivate()

    def test_matmul_method_is_half(self):
        other = torch.randn(self.h, self.h)
        lhs = lambda x: x.matmul(other)
        rhs = lambda x: other.matmul(x)
        run_layer_test(self, [lhs, rhs], ALWAYS_HALF, (self.h, self.h))

    def test_matmul_op_is_half(self):
        other = torch.randn(self.h, self.h)
        lhs = lambda x: x @ other
        rhs = lambda x: other @ x
        run_layer_test(self, [lhs, rhs], ALWAYS_HALF, (self.h, self.h))

    def test_pow_method_is_float(self):
        fn = lambda x: x.pow(2.)
        run_layer_test(self, [fn], ALWAYS_FLOAT, (self.b, self.h))

    def test_pow_op_is_float(self):
        fn = lambda x: x ** 2.
        run_layer_test(self, [fn], ALWAYS_FLOAT, (self.b, self.h))

    def test_cpu_is_float(self):
        fn = lambda x: x.cpu()
        always_cpu_float = {torch.float: 'torch.FloatTensor',
                            torch.half: 'torch.FloatTensor'}
        run_layer_test(self, [fn], always_cpu_float, (self.b, self.h))

    def test_sum_is_float(self):
        fn = lambda x: x.sum()
        run_layer_test(self, [fn], ALWAYS_FLOAT, (self.b, self.h))

class TestDisabledCasts(unittest.TestCase):
    def setUp(self):
        self.handle = amp.init(enabled=False)
        common_init(self)

    def test_disabled_linear(self):
        m = nn.Linear(self.h, self.h)
        f = ft.partial(F.linear, weight=m.weight, bias=m.bias)
        input_shape = (self.b, self.h)

        for fn in [m, f]:
            x = torch.randn(input_shape, dtype=torch.float).requires_grad_()
            y = fn(x)
            self.assertEqual(y.type(), FLOAT)
            y.sum().backward()
            self.assertEqual(x.grad.type(), FLOAT)

            x = torch.randn(input_shape, dtype=torch.half).requires_grad_()
            self.assertRaises(RuntimeError, fn, x)

    # TODO: maybe more tests on disabled casting?

if __name__ == '__main__':
    unittest.main()


================================================
FILE: apex/tests/L0/run_amp/test_cache.py
================================================
import unittest

import functools as ft
import itertools as it

from apex import amp
from apex.amp import _amp_state
import torch
from torch import nn
import torch.nn.functional as F

from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

def get_reference_grad(i, w, ops):
    # Creating new tensors ensures, among other things, that the new tensors are not in the cache.
    # In fact, they are guaranteed not to use the cache because they are not torch.nn.Parameters.
    fp32_i = i.detach().clone().float()
    fp32_w = w.detach().clone().float().requires_grad_()
    loss = ops(fp32_i, fp32_w)
    loss.backward()
    return fp32_w.grad

class WhitelistModule(torch.nn.Module):
    def __init__(self, dtype):
        super(WhitelistModule, self).__init__()
        self.weight = torch.nn.Parameter(torch.arange(8*8, device='cuda', dtype=dtype).view(8,8))

    @staticmethod
    def ops(input, weight):
        return (input.mm(weight)).mm(weight).sum()

    def forward(self, input):
        return self.ops(input, self.weight)


class BlacklistModule(torch.nn.Module):
    def __init__(self, dtype):
        super(BlacklistModule, self).__init__()
        self.weight = torch.nn.Parameter(torch.arange(2*8, device='cuda', dtype=dtype).view(2,8))

    @staticmethod
    def ops(input, weight):
        return (input + torch.pow(weight, 2) + torch.pow(weight, 2)).sum()

    def forward(self, input):
        return self.ops(input, self.weight)


class PromoteModule(torch.nn.Module):
    def __init__(self, dtype):
        super(PromoteModule, self).__init__()
        self.weight = torch.nn.Parameter(torch.arange(2*8, device='cuda', dtype=dtype).view(2,8))

    @staticmethod
    def ops(input, weight):
        return ((input*weight)*weight).sum()

    def forward(self, input):
        return self.ops(input, self.weight)

class TestCache(unittest.TestCase):
    def setUp(self):
        self.x = torch.ones((2, 8), device='cuda', dtype=torch.float32)
        common_init(self)

    def tearDown(self):
        pass

    def train_eval_train_test(self, module, t):
        model = module(t).cuda()
        optimizer = torch.optim.SGD(model.parameters(), lr=1.0)

        _amp_state.allow_incoming_model_not_fp32 = True
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0)
        _amp_state.allow_incoming_model_not_fp32 = False
        
        def training_step():
            for param in model.parameters():
                param.grad = None
        
            loss = model(self.x).sum()
            _amp_state.loss_scalers[0]._loss_scale = 4.0
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        
            self.assertEqual(len([p.grad for p in model.parameters() if p.grad is not None]), 1)
            self.assertEqual(model.weight.grad.type(), model.weight.type())
        
            reference_grad = get_reference_grad(self.x, model.weight, model.ops)
        
            # Currently there's no difference in the allclose calls, so no need for branching,
            # but I'm keeping this in case we want different tolerances for fp16 and fp32 checks. 
            if model.weight.grad.type() == "torch.cuda.HalfTensor":
                self.assertTrue(torch.allclose(model.weight.grad.float(), reference_grad))
            elif model.weight.grad.type() == "torch.cuda.FloatTensor":
                self.assertTrue(torch.allclose(model.weight.grad.float(), reference_grad))
            else:
                raise RuntimeError("model.weight.grad.type = {}".format(model.weight.grad.type()))

            model.weight.data -= 1.
        
        # Simulates first epoch
        training_step()
        
        # Simulates eval
        with torch.no_grad():
            loss = model(self.x).sum()
        
        # Simulates resuming training after eval
        training_step()

        _amp_state.handle._deactivate()
   
    # I could easily have these as a set of for loops in a single test,
    # instead of going for granularity.
    def test_whitelist_module_fp16_weight(self):
        self.train_eval_train_test(WhitelistModule, torch.float16)

    def test_whitelist_module_fp32_weight(self):
        self.train_eval_train_test(WhitelistModule, torch.float32)

    def test_blacklist_module_fp16_weight(self):
        self.train_eval_train_test(BlacklistModule, torch.float16)

    def test_blacklist_module_fp32_weight(self):
        self.train_eval_train_test(BlacklistModule, torch.float32)

    def test_promote_module_fp16_weight(self):
        self.train_eval_train_test(PromoteModule, torch.float16)

    def test_promote_module_fp32_weight(self):
        self.train_eval_train_test(PromoteModule, torch.float32)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: apex/tests/L0/run_amp/test_multi_tensor_axpby.py
================================================
import unittest

import functools as ft
import itertools as it

from apex import amp
import torch
from torch import nn
import torch.nn.functional as F

from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

try:
  import amp_C
  from amp_C import multi_tensor_axpby
  from apex.multi_tensor_apply import MultiTensorApply
  disabled = False
except ImportError as err:
  print("amp_C fused kernels unavailable, disabling TestMultiTensorApply.  ImportError was ", err)
  disabled = True


class TestMultiTensorAxpby(unittest.TestCase):

    def setUp(self):
        common_init(self)

        self.a = 2.0
        self.b = 8.0
        self.xval = 4.0
        self.yval = 16.0
        self.overflow_buf = torch.cuda.IntTensor(1).zero_()
        self.ref = torch.cuda.FloatTensor([136.0])

    def tearDown(self):
        pass

    # The tensor creation here is written for convenience, not speed.
    def axpby(self, sizea, sizeb, applier, repeat_tensors,
              x_type, y_type, out_type, inplace=False):
        self.overflow_buf.zero_()
        t1 = torch.cuda.FloatTensor(sizea).fill_(1.0)
        t2 = torch.cuda.FloatTensor(sizeb).fill_(1.0)

        y_list = []
        for i in range(repeat_tensors):
            y_list += [t1.clone().to(y_type)*self.yval, t2.clone().to(y_type)*self.yval]

        x_list = [x.clone().to(x_type)*(self.xval/self.yval) for x in y_list]

        if inplace:
            out_list = y_list
        else:
            out_list = [out.clone().to(out_type)*3.0 for out in y_list]

        applier(multi_tensor_axpby, self.overflow_buf, [x_list, y_list, out_list], self.a, self.b, -1)

        self.assertTrue(all([torch.allclose(out, self.ref.to(out_type)) for out in out_list]),
                        msg="{} {} {} {} {} {} {}".format(sizea, sizeb, repeat_tensors,
                        x_type, y_type, out_type, inplace))
        self.assertTrue(self.overflow_buf.item() == 0,
                        msg="{} {} {} {} {} {} {}".format(sizea, sizeb, repeat_tensors,
                        x_type, y_type, out_type, inplace))

    # def find_inf(self, sizea, sizeb, applier, repeat_tensors, in_type, out_type, t, ind, val, inplace=False):
    #     self.overflow_buf.zero_()
    #     a = torch.cuda.FloatTensor(sizea).fill_(self.scale)
    #     b = torch.cuda.FloatTensor(sizeb).fill_(self.scale)

    #     out_list = []
    #     for i in range(repeat_tensors):
    #         out_list += [a.clone().to(out_type), b.clone().to(out_type)]

    #     if inplace:
    #         in_list = out_list
    #     else:
    #         in_list = [out.clone().to(in_type) for out in out_list]

    #     applier(multi_tensor_scale, self.overflow_buf, [in_list, out_list], 1./self.scale)

    #     self.overflow_buf.zero_()
    #     in_list[t][ind] = val
    #     applier(multi_tensor_scale, self.overflow_buf, [in_list, out_list], 1./self.scale)
    #     self.assertTrue(self.overflow_buf.item())

    @unittest.skipIf(disabled, "amp_C is unavailable")
    def test_fuzz(self):
        input_size_pairs = (
            (7777*77, 555*555),
            (777, 555),
            (555, 2048*32+1),
            (2048*32+1, 555),
            (555, 2048*32),
            (2048*32, 555),
            (33333, 555),
            (555, 33333))
        appliers = (
            MultiTensorApply(2048*32),
            MultiTensorApply(333),
            MultiTensorApply(33333))
        repeat_tensors = (
            1,
            55)

        for sizea, sizeb in input_size_pairs:
          for applier in appliers:
            for repeat in repeat_tensors:
              for x_type in (torch.float32, torch.float16):
                for y_type in (torch.float32, torch.float16):
                  for out_type in (torch.float32, torch.float16):
                    for inplace in (True, False):
                      if inplace is True and (y_type is not out_type):
                        continue
                      else:
                        self.axpby(sizea, sizeb, applier, repeat,
                                   x_type, y_type, out_type, inplace=inplace)
                      # self.find_inf(sizea, sizeb, applier, repeat, in_type, out_type,
                      #               0, 0, float('nan'), inplace=inplace)
                      # self.find_inf(sizea, sizeb, applier, repeat, in_type, out_type,
                      #               2*repeat-1, sizeb-1, float('inf'), inplace=inplace)
                      # self.find_inf(sizea, sizeb, applier, repeat, in_type, out_type,
                      #              2*(repeat//2), sizea//2, float('inf'), inplace=inplace)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: apex/tests/L0/run_amp/test_multi_tensor_l2norm.py
================================================
import unittest

import functools as ft
import itertools as it

from apex import amp
import torch
from torch import nn
import torch.nn.functional as F

from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

try:
  import amp_C
  from amp_C import multi_tensor_l2norm
  from apex.multi_tensor_apply import MultiTensorApply
  disabled = False
except ImportError as err:
  print("amp_C fused kernels unavailable, disabling TestMultiTensorApply.  ImportError was ", err)
  disabled = True


class TestMultiTensorL2Norm(unittest.TestCase):

    def setUp(self):
        common_init(self)
        self.val = 4.0
        self.overflow_buf = torch.cuda.IntTensor(1).zero_()

    def tearDown(self):
        pass

    # The tensor creation here is written for convenience, not speed.
    def l2norm(self, sizea, sizeb, applier, repeat_tensors, in_type, per_tensor):
        self.overflow_buf.zero_()
        a = torch.cuda.FloatTensor(sizea).fill_(self.val)
        b = torch.cuda.FloatTensor(sizeb).fill_(self.val)

        in_list = []
        for i in range(repeat_tensors):
            in_list += [a.clone().to(in_type), b.clone().to(in_type)]

        if per_tensor:
            norm, norm_per_tensor = applier(multi_tensor_l2norm, self.overflow_buf, [in_list], True)
            normab = torch.cat((a.norm().view(1), b.norm().view(1)))
            norm_per_tensor = norm_per_tensor.view(-1, 2)
        else:
            norm, _ = applier(multi_tensor_l2norm, self.overflow_buf, [in_list], True)

        reference = torch.cuda.FloatTensor((sizea + sizeb)*repeat_tensors).fill_(self.val).norm()

        self.assertTrue(torch.allclose(norm, reference))
        if per_tensor:
          self.assertTrue(torch.allclose(norm_per_tensor, normab))
        self.assertTrue(self.overflow_buf.item() == 0)

    @unittest.skipIf(disabled, "amp_C is unavailable")
    def test_fuzz(self):
        input_size_pairs = (
            (7777*77, 555*555),
            (777, 555),
            (555, 2048*32+1),
            (2048*32+1, 555),
            (555, 2048*32),
            (2048*32, 555),
            (33333, 555),
            (555, 33333))
        appliers = (
            MultiTensorApply(2048*32), 
            MultiTensorApply(333),
            MultiTensorApply(33333))
        repeat_tensors = (
            1,
            55)

        for sizea, sizeb in input_size_pairs:
          for applier in appliers:
            for repeat in repeat_tensors:
              for in_type in (torch.float32, torch.float16):
                for per_tensor in (False, True):
                  self.l2norm(sizea, sizeb, applier, repeat, in_type, per_tensor)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: apex/tests/L0/run_amp/test_multi_tensor_scale.py
================================================
import unittest

import functools as ft
import itertools as it

from apex import amp
import torch
from torch import nn
import torch.nn.functional as F

from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

try:
  import amp_C
  from amp_C import multi_tensor_scale 
  from apex.multi_tensor_apply import MultiTensorApply
  disabled = False
except ImportError as err:
  print("amp_C fused kernels unavailable, disabling TestMultiTensorApply.  ImportError was ", err)
  disabled = True


class TestMultiTensorScale(unittest.TestCase):

    def setUp(self):
        common_init(self)
        self.scale = 4.0
        self.overflow_buf = torch.cuda.IntTensor(1).zero_()
        self.ref = torch.cuda.FloatTensor([1.0])

    def tearDown(self):
        pass

    # The tensor creation here is written for convenience, not speed.
    def downscale(self, sizea, sizeb, applier, repeat_tensors, in_type, out_type, inplace=False):
        self.overflow_buf.zero_()
        a = torch.cuda.FloatTensor(sizea).fill_(self.scale)
        b = torch.cuda.FloatTensor(sizeb).fill_(self.scale)

        out_list = []
        for i in range(repeat_tensors):
            out_list += [a.clone().to(out_type), b.clone().to(out_type)]

        if inplace:
            in_list = out_list
        else:
            in_list = [out.clone().to(in_type) for out in out_list]

        applier(multi_tensor_scale, self.overflow_buf, [in_list, out_list], 1./self.scale)

        self.assertTrue(all([torch.allclose(out, self.ref.to(out_type)) for out in out_list]))
        self.assertTrue(self.overflow_buf.item() == 0)
 
    def find_inf(self, sizea, sizeb, applier, repeat_tensors, in_type, out_type, t, ind, val, inplace=False):
        self.overflow_buf.zero_()
        a = torch.cuda.FloatTensor(sizea).fill_(self.scale)
        b = torch.cuda.FloatTensor(sizeb).fill_(self.scale)

        out_list = []
        for i in range(repeat_tensors):
            out_list += [a.clone().to(out_type), b.clone().to(out_type)]

        if inplace:
            in_list = out_list
        else:
            in_list = [out.clone().to(in_type) for out in out_list]

        applier(multi_tensor_scale, self.overflow_buf, [in_list, out_list], 1./self.scale)

        self.overflow_buf.zero_()
        in_list[t][ind] = val
        applier(multi_tensor_scale, self.overflow_buf, [in_list, out_list], 1./self.scale)
        self.assertTrue(self.overflow_buf.item())

    # Currently, the fused kernel gives a hard error if you attempt to downscale
    # into fp16 output, which imo is the desired behavior.  Maybe someday we
    # will learn otherwise.
    # @unittest.skipIf(disabled, "amp_C is unavailable")
    # def test_fp16_to_fp16(self):
    #     self.downscale(self.fp16, self.fp16, self.fp16_ref)
    # 
    # @unittest.skipIf(disabled, "amp_C is unavailable")
    # def test_fp32_to_fp16(self):
    #     self.downscale(self.fp32, self.fp16, self.fp16_ref)

    @unittest.skipIf(disabled, "amp_C is unavailable")
    def test_fuzz(self):
        input_size_pairs = (
            (7777*77, 555*555),
            (777, 555),
            (555, 2048*32+1),
            (2048*32+1, 555),
            (555, 2048*32),
            (2048*32, 555),
            (33333, 555),
            (555, 33333))
        appliers = (
            MultiTensorApply(2048*32), 
            MultiTensorApply(333),
            MultiTensorApply(33333))
        repeat_tensors = (
            1,
            55)

        for sizea, sizeb in input_size_pairs:
          for applier in appliers:
            for repeat in repeat_tensors:
              for in_type in (torch.float32, torch.float16):
                for out_type in (torch.float32, torch.float16):
                  for inplace in (True, False):
                    if inplace is True and (out_type is not in_type):
                      continue
                    else:
                      self.downscale(sizea, sizeb, applier, repeat, in_type, out_type, inplace=inplace)
                      self.find_inf(sizea, sizeb, applier, repeat, in_type, out_type,
                                    0, 0, float('nan'), inplace=inplace)
                      self.find_inf(sizea, sizeb, applier, repeat, in_type, out_type,
                                    2*repeat-1, sizeb-1, float('inf'), inplace=inplace)
                      self.find_inf(sizea, sizeb, applier, repeat, in_type, out_type,
                                   2*(repeat//2), sizea//2, float('inf'), inplace=inplace)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: apex/tests/L0/run_amp/test_multiple_models_optimizers_losses.py
================================================
import unittest

import functools as ft
import itertools as it

from apex import amp
from apex.amp import _amp_state
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn import Parameter

from utils import common_init, HALF, FLOAT,\
    ALWAYS_HALF, ALWAYS_FLOAT, MATCH_INPUT

class MyModel(torch.nn.Module):
    def __init__(self, unique):
        super(MyModel, self).__init__()
        self.weight0 = Parameter(unique +
            torch.arange(2, device='cuda', dtype=torch.float32))
        self.weight1 = Parameter(1. + unique + torch.arange(2, device='cuda', dtype=torch.float16))

    @staticmethod
    def ops(input, weight0, weight1):
        return ((input*(weight0.float()))*(weight1.float())).sum()

    def forward(self, input):
        return self.ops(input, self.weight0, self.weight1)

# Abandon all hope, ye who enter here.

# This is hands down the ugliest code I have ever written, but it succeeds in testing
# multiple models/optimizers/losses fairly thoroughly.  Many of the different test cases
# require slightly divergent code in a way that seems near-impossible to genericize into a simple
# cross product or nested loops.

class TestMultipleModelsOptimizersLosses(unittest.TestCase):
    def setUp(self):
        self.x = torch.ones((2), device='cuda', dtype=torch.float32)
        common_init(self)

    def tearDown(self):
        pass

    def test_2models2losses1optimizer(self):
        model0 = MyModel(1)
        model1 = MyModel(2)

        optimizer = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25},
                                     {'params' : model1.parameters(), 'lr' : 0.5}],
                                    momentum=0.125)

        reference_grads = []
        for i in range(2):
            optimizer.zero_grad()
            loss0 = model0(self.x)
            loss1 = model1(self.x)
            loss0.backward()
            loss1.backward()

            reference_grads.append([param.grad.data.clone() for param in model0.parameters()] +
                                   [param.grad.data.clone() for param in model1.parameters()])

            optimizer.step()

        final_params = [param.data.clone() for param in model0.parameters()] + \
                       [param.data.clone() for param in model1.parameters()]

        for opt_level in ("O0", "O1", "O2", "O3"):
          for how_to_zero in ("none", "model", "optimizer"):
            for use_multiple_loss_scalers in (True, False):
              if opt_level == "O1" or opt_level == "O2":
                  inject_inf_iters = (-1, 0, 1)
              else:
                  inject_inf_iters = (-1,)

              for inject_inf in inject_inf_iters:
                if inject_inf >= 0:
                   inject_inf_locs = ("fp16", "fp32")
                   which_backwards = (0, 1)
                else:
                   inject_inf_locs = ("fdsa",)
                   which_backwards = (None,)

                for inject_inf_loc in inject_inf_locs:
                  for which_backward in which_backwards:
                      if use_multiple_loss_scalers:
                          num_losses = 2
                          loss_ids = [0, 1]
                      else:
                          num_losses = 1
                          loss_ids = [0, 0]

                      if inject_inf >= 0:
                          iters = 3
                      else:
                          iters = 2

                      model0 = MyModel(1)
                      model1 = MyModel(2)

                      models = [model0, model1]

                      optimizer = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25},
                                                   {'params' : model1.parameters(), 'lr' : 0.5}],
                                                  momentum=0.125)

                      _amp_state.allow_incoming_model_not_fp32 = True
                      [model0, model1], optimizer = amp.initialize(
                          [model0, model1],
                          optimizer,
                          opt_level=opt_level,
                          verbosity=0,
                          cast_model_type=False,
                          num_losses=num_losses)
                      _amp_state.allow_incoming_model_not_fp32 = False

                      _amp_state.loss_scalers[0]._loss_scale = 4.0
                      if use_multiple_loss_scalers:
                          _amp_state.loss_scalers[1]._loss_scale = 16.0

                      unskipped = 0
                      for i in range(iters):
                          if how_to_zero == "none":
                              for model in models:
                                  for param in model.parameters():
                                      param.grad = None
                          elif how_to_zero == "model":
                              for model in models:
                                  model.zero_grad()
                          else:
                              optimizer.zero_grad()

                          loss0 = model0(self.x)
                          loss1 = model1(self.x)

                          with amp.scale_loss(loss0, optimizer, loss_id=loss_ids[0]) as scaled_loss:
                              scaled_loss.backward()
                              if i == inject_inf and which_backward == 0:
                                  if inject_inf_loc == "fp32":
                                      model0.weight0.grad[0] = float('inf')
                                  elif inject_inf_loc == "fp16":
                                      model0.weight1.grad[0] = float('inf')
                          with amp.scale_loss(loss1, optimizer, loss_id=loss_ids[1]) as scaled_loss:
                              scaled_loss.backward()
                              if i == inject_inf and which_backward == 1:
                                  if inject_inf_loc == "fp32":
                                      model1.weight0.grad[0] = float('inf')
                                  elif inject_inf_loc == "fp16":
                                      model1.weight1.grad[0] = float('inf')

                          if i != inject_inf:
                              for param, reference_grad in zip(amp.master_params(optimizer),
                                                               reference_grads[unskipped]):
                                  self.assertTrue(torch.allclose(param.grad.float(), reference_grad.float()))
                              unskipped += 1
                          optimizer.step()

                      model_params = [p for p in model0.parameters()] + [p for p in model1.parameters()]
                      for model, master, reference in zip(
                              model_params,
                              amp.master_params(optimizer),
                              final_params):
                          self.assertTrue(torch.allclose(model, reference))
                          self.assertTrue(torch.allclose(model, master.to(model.dtype)))

                      if opt_level == "O1":
                          _amp_state.handle._deactivate()

    def test_3models2losses1optimizer(self):

        model0 = MyModel(1)
        model1 = MyModel(2)
        model2 = MyModel(3)

        optimizer = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25},
                                     {'params' : model1.parameters(), 'lr' : 0.5},
                                     {'params' : model2.parameters(), 'lr' : 0.125}],
                                     momentum=0.125)

        reference_grads = []
        for i in range(2):
            optimizer.zero_grad()
            loss0 = model0(self.x) + model2(self.x)
            loss1 = model1(self.x) + model2(self.x)
            loss0.backward()
            loss1.backward()

            reference_grads.append([param.grad.data.clone() for param in model0.parameters()] +
                                   [param.grad.data.clone() for param in model1.parameters()] +
                                   [param.grad.data.clone() for param in model2.parameters()])

            optimizer.step()


        final_params = [param.data.clone() for param in model0.parameters()] + \
                       [param.data.clone() for param in model1.parameters()] + \
                       [param.data.clone() for param in model2.parameters()]

        for opt_level in ("O0", "O1", "O2", "O3"):
          for how_to_zero in ("none", "model", "optimizer"):
            for use_multiple_loss_scalers in (True, False):
              if opt_level == "O1" or opt_level == "O2":
                  inject_inf_iters = (-1, 0, 1)
              else:
                  inject_inf_iters = (-1,)

              for inject_inf in inject_inf_iters:
                if inject_inf >= 0:
                   inject_inf_locs = ("fp16", "fp32")
                   which_backwards = (0, 1)
                else:
                   inject_inf_locs = ("fdsa",)
                   which_backwards = (None,)

                for inject_inf_loc in inject_inf_locs:
                  for which_backward in which_backwards:
                    if use_multiple_loss_scalers:
                        num_losses = 2
                        loss_ids = [0, 1]
                    else:
                        num_losses = 1
                        loss_ids = [0, 0]

                    if inject_inf >= 0:
                        iters = 3
                        if which_backward == 0:
                            which_models = (0, 2)
                        elif which_backward == 1:
                            which_models = (1, 2)
                    else:
                        iters = 2
                        which_models = (None,)

                    for which_model in which_models:
                        model0 = MyModel(1)
                        model1 = MyModel(2)
                        model2 = MyModel(3)

                        models = [model0, model1, model2]

                        optimizer = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25},
                                                     {'params' : model1.parameters(), 'lr' : 0.5},
                                                     {'params' : model2.parameters(), 'lr' : 0.125}],
                                                     momentum=0.125)

                        _amp_state.allow_incoming_model_not_fp32 = True
                        [model0, model1, model2], optimizer = amp.initialize(
                            [model0, model1, model2],
                            optimizer,
                            opt_level=opt_level,
                            verbosity=0,
                            cast_model_type=False,
                            num_losses=num_losses)
                        _amp_state.allow_incoming_model_not_fp32 = False

                        _amp_state.loss_scalers[0]._loss_scale = 4.0
                        if use_multiple_loss_scalers:
                            _amp_state.loss_scalers[1]._loss_scale = 16.0

                        unskipped = 0
                        for i in range(iters):
                            if how_to_zero == "none":
                                for model in models:
                                    for param in model.parameters():
                                        param.grad = None
                            elif how_to_zero == "model":
                                for model in models:
                                    model.zero_grad()
                            else:
                                optimizer.zero_grad()

                            # print("opt_level {} i {} inject_inf {} which_backward {} inject_inf_loc {} which_model {} use_multiple_loss_scalers {}".format(opt_level, i, inject_inf, which_backward, inject_inf_loc, which_model, use_multiple_loss_scalers))

                            loss0 = model0(self.x) + model2(self.x)
                            loss1 = model1(self.x) + model2(self.x)

                            with amp.scale_loss(loss0, optimizer, loss_id=loss_ids[0]) as scaled_loss:
                                scaled_loss.backward()
                                if i == inject_inf and which_backward == 0:
                                    if which_model == 0:
                                        inj_model = model0
                                    elif which_model == 2:
                                        inj_model = model2
                                    else:
                                        raise RuntimeError(which_model + " invalid for loss 0")
                                    if inject_inf_loc == "fp32":
                                        inj_model.weight0.grad[0] = float('inf')
                                    elif inject_inf_loc == "fp16":
                                        inj_model.weight1.grad[0] = float('inf')
                            with amp.scale_loss(loss1, optimizer, loss_id=loss_ids[1]) as scaled_loss:
                                scaled_loss.backward()
                                if i == inject_inf and which_backward == 1:
                                    if which_model == 1:
                                        inj_model = model1
                                    elif which_model == 2:
                                        inj_model = model2
                                    else:
                                        raise RuntimeError(which_model + " invalid for loss 1 ")
                                    if inject_inf_loc == "fp32":
                                        inj_model.weight0.grad[0] = float('inf')
                                    elif inject_inf_loc == "fp16":
                                        inj_model.weight1.grad[0] = float('inf')

                            if i != inject_inf:
                                for param, reference_grad in zip(amp.master_params(optimizer),
                                                                 reference_grads[unskipped]):
                                    self.assertTrue(torch.allclose(param.grad.float(), reference_grad.float()))
                                unskipped += 1

                            optimizer.step()

                        model_params = [p for p in model0.parameters()] + \
                                       [p for p in model1.parameters()] + \
                                       [p for p in model2.parameters()]
                        for model, master, reference in zip(
                                model_params,
                                amp.master_params(optimizer),
                                final_params):
                            self.assertTrue(torch.allclose(model, reference))
                            self.assertTrue(torch.allclose(model, master.to(model.dtype)))

                        if opt_level == "O1":
                            _amp_state.handle._deactivate()

    def test_2models2losses2optimizers(self):
        model0 = MyModel(1)
        model1 = MyModel(2)

        optimizer0 = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25}],
                                      momentum=0.125)
        optimizer1 = torch.optim.SGD([{'params' : model1.parameters(), 'lr' : 0.5}],
                                      momentum=0.25)

        # Don't do it like this:  reference_grads = [[]]*5
        # because then it creates a list of 5 references to the same "[]" and appending
        # to any of them effectively makes you append to all of them, which multiplies
        # the resulting size of reference_grads by 5x and needless to say makes the test fail.
        reference_grads = [[], [], [], [], []]
        final_params = [None, None, None, None, None]
        for i in range(2):
            optimizer0.zero_grad()
            optimizer1.zero_grad()
            loss0 = model0(self.x)
            loss1 = model1(self.x)
            loss0.backward()
            loss1.backward()

            reference_grads[0].append([param.grad.data.clone() for param in model0.parameters()] +
                                   [param.grad.data.clone() for param in model1.parameters()])

            optimizer0.step()
            optimizer1.step()

        final_params[0] = [param.data.clone() for param in model0.parameters()] + \
                          [param.data.clone() for param in model1.parameters()]

        def what_got_skipped(which_iter, which_backward):
            if which_iter == 0 and which_backward == 0:
                return 1
            if which_iter == 0 and which_backward == 1:
                return 2
            if which_iter == 1 and which_backward == 0:
                return 3
            if which_iter == 1 and which_backward == 1:
                return 4
            return 0

        for which_iter in (0,1):
            for which_backward in (0,1):
                model0 = MyModel(1)
                model1 = MyModel(2)

                optimizer0 = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25}],
                                              momentum=0.125)
                optimizer1 = torch.optim.SGD([{'params' : model1.parameters(), 'lr' : 0.5}],
                                              momentum=0.25)

                for i in range(3):
                    optimizer0.zero_grad()
                    optimizer1.zero_grad()
                    loss0 = model0(self.x)
                    loss1 = model1(self.x)
                    loss0.backward()
                    loss1.backward()

                    if i != which_iter:
                        reference_grads[what_got_skipped(which_iter, which_backward)].append(
                            [param.grad.data.clone() for param in model0.parameters()] +
                            [param.grad.data.clone() for param in model1.parameters()])

                    if i == which_iter:
                        if which_backward == 0:
                            optimizer1.step()
                        else:
                            optimizer0.step()
                    else:
                        optimizer0.step()
                        optimizer1.step()

                final_params[what_got_skipped(which_iter, which_backward)] = \
                    [param.data.clone() for param in model0.parameters()] + \
                    [param.data.clone() for param in model1.parameters()]

        for opt_level in ("O0", "O1", "O2", "O3"):
          for how_to_zero in ("none", "model", "optimizer"):
            for use_multiple_loss_scalers in (True, False):
              if opt_level == "O1" or opt_level == "O2":
                  inject_inf_iters = (-1, 0, 1)
              else:
                  inject_inf_iters = (-1,)

              for inject_inf in inject_inf_iters:
                if inject_inf >= 0:
                   inject_inf_locs = ("fp16", "fp32")
                   which_backwards = (0, 1)
                else:
                   inject_inf_locs = ("fdsa",)
                   which_backwards = (None,)

                for inject_inf_loc in inject_inf_locs:
                  for which_backward in which_backwards:
                      if use_multiple_loss_scalers:
                          num_losses = 2
                          loss_ids = [0, 1]
                      else:
                          num_losses = 1
                          loss_ids = [0, 0]

                      if inject_inf >= 0:
                          iters = 3
                      else:
                          iters = 2

                      model0 = MyModel(1)
                      model1 = MyModel(2)

                      models = [model0, model1]

                      optimizer0 = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25}],
                                                    momentum=0.125)
                      optimizer1 = torch.optim.SGD([{'params' : model1.parameters(), 'lr' : 0.5}],
                                                    momentum=0.25)

                      _amp_state.allow_incoming_model_not_fp32 = True
                      [model0, model1], [optimizer0, optimizer1] = amp.initialize(
                          [model0, model1],
                          [optimizer0, optimizer1],
                          opt_level=opt_level,
                          verbosity=0,
                          cast_model_type=False,
                          num_losses=num_losses)
                      _amp_state.allow_incoming_model_not_fp32 = False

                      _amp_state.loss_scalers[0]._loss_scale = 4.0
                      if use_multiple_loss_scalers:
                          _amp_state.loss_scalers[1]._loss_scale = 16.0

                      unskipped = 0
                      for i in range(iters):
                          if how_to_zero == "none":
                              for model in models:
                                  for param in model.parameters():
                                      param.grad = None
                          elif how_to_zero == "model":
                              for model in models:
                                  model.zero_grad()
                          else:
                              optimizer0.zero_grad()
                              optimizer1.zero_grad()

                          loss0 = model0(self.x)
                          loss1 = model1(self.x)

                          with amp.scale_loss(loss0, optimizer0, loss_id=loss_ids[0]) as scaled_loss:
                              scaled_loss.backward()
                              if i == inject_inf and which_backward == 0:
                                  if inject_inf_loc == "fp32":
                                      model0.weight0.grad[0] = float('inf')
                                  elif inject_inf_loc == "fp16":
                                      model0.weight1.grad[0] = float('inf')
                          with amp.scale_loss(loss1, optimizer1, loss_id=loss_ids[1]) as scaled_loss:
                              scaled_loss.backward()
                              if i == inject_inf and which_backward == 1:
                                  if inject_inf_loc == "fp32":
                                      model1.weight0.grad[0] = float('inf')
                                  elif inject_inf_loc == "fp16":
                                      model1.weight1.grad[0] = float('inf')

                          # print("opt_level {} i {} inject_inf {} which_backward {} inject_inf_loc {} use_multiple_loss_scalers {}".format(opt_level, i, inject_inf, which_backward, inject_inf_loc, use_multiple_loss_scalers))

                          if i != inject_inf:
                              master_params = list(amp.master_params(optimizer0)) + \
                                              list(amp.master_params(optimizer1))
                              for param, reference_grad in zip(master_params,
                                      reference_grads[what_got_skipped(inject_inf, which_backward)][unskipped]):
                                  self.assertTrue(torch.allclose(param.grad.float(), reference_grad.float()))
                              unskipped += 1

                          optimizer0.step()
                          optimizer1.step()

                      model_params = [p for p in model0.parameters()] + [p for p in model1.parameters()]
                      master_params = [p for p in amp.master_params(optimizer0)] + \
                                      [p for p in amp.master_params(optimizer1)]
                      for model, master, reference in zip(
                              model_params,
                              master_params,
                              final_params[what_got_skipped(inject_inf, which_backward)]):
                          self.assertTrue(torch.allclose(model, reference))
                          self.assertTrue(torch.allclose(model, master.to(model.dtype)))

                      if opt_level == "O1":
                          _amp_state.handle._deactivate()

    def test_3models2losses2optimizers(self):
        model0 = MyModel(1)
        model1 = MyModel(2)
        model2 = MyModel(3)

        optimizer0 = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25},
                                      {'params' : model1.parameters(), 'lr' : 1.0}],
                                     momentum=0.5)
        optimizer1 = torch.optim.SGD([{'params' : model2.parameters(), 'lr' : 0.5}],
                                     momentum=0.25)

        # Again, can't do this:  reference_grads = [[]]*9
        reference_grads = [[], [], [], [], [], [], [], [], []]
        final_params = [None, None, None, None, None, None, None, None, None]
        for i in range(2):
            optimizer0.zero_grad()
            optimizer1.zero_grad()
            loss0 = model0(self.x) + model1(self.x)
            loss1 = model2(self.x) + model1(self.x)
            loss0.backward()
            loss1.backward()

            reference_grads[0].append([param.grad.data.clone() for param in model0.parameters()] +
                                   [param.grad.data.clone() for param in model1.parameters()])

            optimizer0.step()
            optimizer1.step()

        final_params[0] = \
            [param.data.clone() for param in model0.parameters()] + \
            [param.data.clone() for param in model1.parameters()] + \
            [param.data.clone() for param in model2.parameters()]

        def what_got_skipped(which_iter, which_backward, which_model):
            if which_iter == 0:
                if which_backward == 0:
                    if which_model == 0:
                        return 1
                    if which_model == 1:
                        return 2
                if which_backward == 1:
                    if which_model == 2:
                        return 3
                    if which_model == 1:
                        return 4
            if which_iter == 1:
                if which_backward == 0:
                    if which_model == 0:
                        return 5
                    if which_model == 1:
                        return 6
                if which_backward == 1:
                    if which_model == 2:
                        return 7
                    if which_model == 1:
                        return 8
            return 0

        for which_iter in (0,1):
            for which_backward in (0,1):
                if which_backward == 0:
                    which_models = (0,1)
                if which_backward == 1:
                    which_models = (2,1)
                for which_model in which_models:

                    model0 = MyModel(1)
                    model1 = MyModel(2)
                    model2 = MyModel(3)

                    optimizer0 = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25},
                                                  {'params' : model1.parameters(), 'lr' : 1.0}],
                                                 momentum=0.5)
                    optimizer1 = torch.optim.SGD([{'params' : model2.parameters(), 'lr' : 0.5}],
                                                 momentum=0.25)

                    for i in range(3):
                        optimizer0.zero_grad()
                        optimizer1.zero_grad()
                        loss0 = model0(self.x) + model1(self.x)
                        loss1 = model2(self.x) + model1(self.x)
                        loss0.backward()
                        loss1.backward()

                        if i != which_iter:
                            reference_grads[what_got_skipped(which_iter,
                                    which_backward, which_model)].append(
                                [param.grad.data.clone() for param in model0.parameters()] +
                                [param.grad.data.clone() for param in model1.parameters()])

                        if i == which_iter:
                            if which_backward == 0:
                                # if which_model == 0:
                                    optimizer1.step()
                                # if which_model == 1:
                                #     optimizer1.step()
                            if which_backward == 1:
                                # if which_model == 2:
                                #     optimizer0.step()
                                # if which_model == 1:
                                    continue
                        else:
                            optimizer0.step()
                            optimizer1.step()

                    final_params[what_got_skipped(which_iter, which_backward, which_model)] = \
                        [param.data.clone() for param in model0.parameters()] + \
                        [param.data.clone() for param in model1.parameters()] + \
                        [param.data.clone() for param in model2.parameters()]

        for opt_level in ("O0", "O1", "O2", "O3"):
          for how_to_zero in ("none", "model", "optimizer"):
            for use_multiple_loss_scalers in (True, False):
              if opt_level == "O1" or opt_level == "O2":
                  inject_inf_iters = (-1, 0, 1)
              else:
                  inject_inf_iters = (-1,)

              for inject_inf in inject_inf_iters:
                if inject_inf >= 0:
                   inject_inf_locs = ("fp16", "fp32")
                   which_backwards = (0, 1)
                else:
                   inject_inf_locs = ("fdsa",)
                   which_backwards = (None,)

                for inject_inf_loc in inject_inf_locs:
                  for which_backward in which_backwards:
                    if use_multiple_loss_scalers:
                        num_losses = 2
                        loss_ids = [0, 1]
                    else:
                        num_losses = 1
                        loss_ids = [0, 0]

                    if inject_inf >= 0:
                        iters = 3
                        if which_backward == 0:
                            which_models = (0, 1)
                        elif which_backward == 1:
                            which_models = (2, 1)
                    else:
                        iters = 2
                        which_models = (None,)

                    for which_model in which_models:
                        model0 = MyModel(1)
                        model1 = MyModel(2)
                        model2 = MyModel(3)

                        models = [model0, model1, model2]

                        optimizer0 = torch.optim.SGD([{'params' : model0.parameters(), 'lr' : 0.25},
                                                      {'params' : model1.parameters(), 'lr' : 1.0}],
                                                     momentum=0.5)
                        optimizer1 = torch.optim.SGD([{'params' : model2.parameters(), 'lr' : 0.5}],
                                                     momentum=0.25)

                        _amp_state.allow_incoming_model_not_fp32 = True
                        [model0, model1, model2], [optimizer0, optimizer1] = amp.initialize(
                            [model0, model1, model2],
                            [optimizer0, optimizer1],
                            opt_level=opt_level,
                            verbosity=0,
                            cast_model_type=False,
                            num_losses=num_losses)
                        _amp_state.allow_incoming_model_not_fp32 = False

                        _amp_state.loss_scalers[0]._loss_scale = 4.0
                        if use_multiple_loss_scalers:
                            _amp_state.loss_scalers[1]._loss_scale = 16.0

                        unskipped = 0
                        for i in range(iters):
                            if how_to_zero == "none":
                                for model in models:
                                    for param in model.parameters():
                                        param.grad = None
                            elif how_to_zero == "model":
                                for model in models:
                                    model.zero_grad()
                            else:
                                optimizer0.zero_grad()
                                optimizer1.zero_grad()

                            loss0 = model0(self.x) + model1(self.x)
                            loss1 = model2(self.x) + model1(self.x)

                            with amp.scale_loss(loss0, optimizer0, loss_id=loss_ids[0]) as scaled_loss:
                                scaled_loss.backward()
                                if i == inject_inf and which_backward == 0:
                                    if which_model == 0:
                                        inj_model = model0
                                    elif which_model == 1:
                                        inj_model = model1
                                    else:
                                        raise RuntimeError(which_model + " invalid for loss 0")
                                    if inject_inf_loc == "fp32":
                                        inj_model.weight0.grad[0] = float('inf')
                                    elif inject_inf_loc == "fp16":
                                        inj_model.weight1.grad[0] = float('inf')
                            with amp.scale_loss(loss1, [optimizer0, optimizer1], loss_id=loss_ids[1]) as scaled_loss:
                                scaled_loss.backward()
                                if i == inject_inf and which_backward == 1:
                                    if which_model == 2:
                                        inj_model = model2
                                    elif which_model == 1:
                                        inj_model = model1
                                    else:
                                        raise RuntimeError(which_model + " invalid for loss 1 ")
                                    if inject_inf_loc == "fp32":
                                        inj_model.weight0.grad[0] = float('inf')
                                    elif inject_inf_loc == "fp16":
                                        inj_model.weight1.grad[0] = float('inf')

                            if i != inject_inf:
                                master_params = list(amp.master_params(optimizer0)) + \
                                                list(amp.master_params(optimizer1))
                                for param, reference_grad in zip(master_params,
                                      reference_grads[what_got_skipped(inject_inf,
                                          which_backward, which_model)][unskipped]):
                                    self.assertTrue(torch.allclose(param.grad.float(), reference_grad.float()))
                                unskipped += 1

                            optimizer0.step()
                            optimizer1.step()

                        model_params = [p for p in model0.parameters()] + \
                                       [p for p in model1.parameters()] + \
                                       [p for p in model2.parameters()]
                        master_params = [p for p in amp.master_params(optimizer0)] + \
                                        [p for p in amp.master_params(optimizer1)]

                        # print("opt_level {} i {} inject_inf {} which_backward {} inject_inf_loc {} use_multiple_loss_scalers {} which_model {}".format(opt_level, i, inject_inf, which_backward, inject_inf_loc, use_multiple_loss_scalers, which_model))

                        for model, master, reference in zip(
                                model_params,
                                master_params,
                                final_params[what_got_skipped(inject_inf, which_backward, which_model)]):
                            self.assertTrue(torch.allclose(model, reference))
                            self.assertTrue(torch.allclose(model, master.to(model.dtype)))

                        if opt_level == "O1":
                            _amp_state.handle._deactivate()

if __name__ == '__main__':
    unittest.main()


================================================
FILE: apex/tests/L0/run_amp/test_promotion.py
================================================
import unittest

import itertools as it

from apex import amp
import torch
from torch import nn
import torch.nn.functional as F

from utils import common_init, HALF, FLOAT, DTYPES

class TestPromotion(unittest.TestCase):
    def setUp(self):
        self.handle = amp.init(enabled=True)
        common_init(self)

    def tearDown(self):
        self.handle._deactivate()

    def run_binary_promote_test(self, fns, input_shape, x_inplace=False):
        type_pairs = it.product(DTYPES, DTYPES)
        for fn, (xtype, ytype) in it.product(fns, type_pairs):
            x = torch.randn(input_shape, dtype=xtype).requires_grad_()
            x_leaf = x
            if x_inplace:
                # We need a non-leaf to call in place on
                x = x.clone()
            y = torch.randn(input_shape, dtype=ytype)
            out = fn(x, y)
            if x_inplace:
                # In place: always match xtype
                self.assertEqual(out.type(), x.type())
            else:
                # Out of place: match widest type
                if xtype == torch.float or ytype == torch.float:
                    self.assertEqual(out.type(), FLOAT)
                else:
                    self.assertEqual(out.type(), HALF)
            out.float().sum().backward()
            self.assertEqual(x_leaf.grad.dtype, xtype)

    def test_atan2_matches_widest(self):
        fns = [lambda x, y : torch.atan2(x, y),
               lambda x, y : x.atan2(y)]
        self.run_binary_promote_test(fns, (self.b,))

    def test_mul_matches_widest(self):
        fns = [lambda x, y : torch.mul(x, y),
               lambda x, y: x.mul(y)]
        self.run_binary_promote_test(fns, (self.b,))

    def test_cat_matches_widest(self):
        shape = self.b
        ys = [torch.randn(shape, dtype=torch.half) for _ in range(5)]
        x_float = torch.randn(shape)
        out = torch.cat(ys + [x_float])
        self.assertEqual(out.type(), FLOAT)
        x_half = torch.randn(shape, dtype=torch.half)
        out = torch.cat(ys + [x_half])
        self.assertEqual(out.type(), HALF)

    def test_inplace_exp_is_error_for_half(self):
        xs = torch.randn(self.b)
        xs.exp_()
        self.assertEqual(xs.type(), FLOAT)
        xs = torch.randn(self.b, dtype=torch.half)
        with self.assertRaises(NotImplementedError):
            xs.exp_()

    def test_inplace_add_matches_self(self):
        fn = lambda x, y: x.add_(y)
        self.run_binary_promote_test([fn], (self.b,), x_inplace=True)

if __name__ == '__main__':
    unittest.main()


================================================
FILE: apex/tests/L0/run_amp/test_rnn.py
================================================
import unittest

from apex import amp
import random
import torch
from torch import nn

from utils import common_init, HALF

class TestRnnCells(unittest.TestCase):
    def setUp(self):
        self.handle = amp.init(enabled=True)
        common_init(self)

    def tearDown(self):
        self.handle._deactivate()

    def run_cell_test(self, cell, state_tuple=False):
        shape = (self.b, self.h)
        for typ in [torch.float, torch.half]:
            xs = [torch.randn(shape, dtype=typ).requires_grad_()
                  for _ in range(self.t)]
            hidden_fn = lambda: torch.zeros(shape, dtype=typ)
            if state_tuple:
                hidden = (hidden_fn(), hidden_fn())
            else:
                hidden = hidden_fn()
            outputs = []
            for i in range(self.t):
                hidden = cell(xs[i], hidden)
                if state_tuple:
                    output = hidden[0]
                else:
                    output = hidden
                outputs.append(output)
            for y in outputs:
                self.assertEqual(y.type(), HALF)
            outputs[-1].float().sum().backward()
            for i, x in enumerate(xs):
                self.assertEqual(x.grad.dtype, x.dtype)

    def test_rnn_cell_is_half(self):
        cell = nn.RNNCell(self.h, self.h)
        self.run_cell_test(cell)

    def test_gru_cell_is_half(self):
        cell = nn.GRUCell(self.h, self.h)
        self.run_cell_test(cell)

    def test_lstm_cell_is_half(self):
        cell = nn.LSTMCell(self.h, self.h)
        self.run_cell_test(cell, state_tuple=True)

class TestRnns(unittest.TestCase):
    def setUp(self):
        self.handle = amp.init(enabled=True)
        common_init(self)

    def tearDown(self):
        self.handle._deactivate()

    def run_rnn_test(self, rnn, layers, bidir, state_tuple=False):
        for typ in [torch.float, torch.half]:
            x = torch.randn((self.t, self.b, self.h), dtype=typ).requires_grad_()
            hidden_fn = lambda: torch.zeros((layers + (layers * bidir),
                                             self.b, self.h), dtype=typ)
            if state_tuple:
                hidden = (hidden_fn(), hidden_fn())
            else:
                hidden = hidden_fn()
            output, _ = rnn(x, hidden)
            self.assertEqual(output.type(), HALF)
            output[-1, :, :].float().sum().backward()
            self.assertEqual(x.grad.dtype, x.dtype)

    def test_rnn_is_half(self):
        configs = [(1, False), (2, False), (2, True)]
        for layers, bidir in configs:
            rnn = nn.RNN(input_size=self.h, hidden_size=self.h, num_layers=layers,
                         nonlinearity='relu', bidirectional=bidir)
            self.run_rnn_test(rnn, layers, bidir)

    def test_gru_is_half(self):
        configs = [(1, False), (2, False), (2, True)]
        for layers, bidir in configs:
            rnn = nn.GRU(input_size=self.h, hidden_size=self.h, num_layers=layers,
                         bidirectional=bidir)
            self.run_rnn_test(rnn, layers, bidir)

    def test_lstm_is_half(self):
        configs = [(1, False), (2, False), (2, True)]
        for layers, bidir in configs:
            rnn = nn.LSTM(input_size=self.h, hidden_size=self.h, num_layers=layers,
                         bidirectional=bidir)
            self.run_rnn_test(rnn, layers, bidir, state_tuple=True)

    def test_rnn_packed_sequence(self):
        num_layers = 2
        rnn = nn.RNN(input_size=self.h, hidden_size=self.h, num_layers=num_layers)
        for typ in [torch.float, torch.half]:
            x = torch.randn((self.t, self.b, self.h), dtype=typ).requires_grad_()
            lens = sorted([random.randint(self.t // 2, self.t) for _ in range(self.b)],
                          reverse=True)
            # `pack_padded_sequence` breaks if default tensor type is non-CPU
            torch.set_default_tensor_type(torch.FloatTensor)
            lens = torch.tensor(lens, dtype=torch.int64, device=torch.device('cpu'))
            packed_seq = nn.utils.rnn.pack_padded_sequence(x, lens)
            torch.set_default_tensor_type(torch.cuda.FloatTensor)
            hidden = torch.zeros((num_layers, self.b, self.h), dtype=typ)
            output, _ = rnn(packed_seq, hidden)
            self.assertEqual(output.data.type(), HALF)
            output.data.float().sum().backward()
            self.assertEqual(x.grad.dtype, x.dtype)

if __name__ == '__main__':
    unittest.main()


================================================
FILE: apex/tests/L0/run_amp/utils.py
================================================
import torch

HALF = 'torch.cuda.HalfTensor'
FLOAT = 'torch.cuda.FloatTensor'

DTYPES = [torch.half, torch.float]

ALWAYS_HALF = {torch.float: HALF,
               torch.half: HALF}
ALWAYS_FLOAT = {torch.float: FLOAT,
                torch.half: FLOAT}
MATCH_INPUT = {torch.float: FLOAT,
               torch.half: HALF}

def common_init(test_case):
    test_case.h = 64
    test_case.b = 16
    test_case.c = 16
    test_case.k = 3
    test_case.t = 10
    torch.set_default_tensor_type(torch.cuda.FloatTensor)


================================================
FILE: apex/tests/L0/run_fp16util/__init__.py
================================================


================================================
FILE: apex/tests/L0/run_fp16util/test_fp16util.py
================================================
import unittest

import torch
import torch.nn as nn

from apex.fp16_utils import FP16Model


class DummyBlock(nn.Module):
    def __init__(self):
        super(DummyBlock, self).__init__()

        self.conv = nn.Conv2d(10, 10, 2)
        self.bn = nn.BatchNorm2d(10, affine=True)

    def forward(self, x):
        return self.conv(self.bn(x))


class DummyNet(nn.Module):
    def __init__(self):
        super(DummyNet, self).__init__()

        self.conv1 = nn.Conv2d(3, 10, 2)
        self.bn1 = nn.BatchNorm2d(10, affine=False)
        self.db1 = DummyBlock()
        self.db2 = DummyBlock()

    def forward(self, x):
        out = x
        out = self.conv1(out)
        out = self.bn1(out)
        out = self.db1(out)
        out = self.db2(out)
        return out


class DummyNetWrapper(nn.Module):
    def __init__(self):
        super(DummyNetWrapper, self).__init__()

        self.bn = nn.BatchNorm2d(3, affine=True)
        self.dn = DummyNet()

    def forward(self, x):
        return self.dn(self.bn(x))


class TestFP16Model(unittest.TestCase):
    def setUp(self):
        self.N = 64
        self.C_in = 3
        self.H_in = 16
        self.W_in = 32
        self.in_tensor = torch.randn((self.N, self.C_in, self.H_in, self.W_in)).cuda()
        self.orig_model = DummyNetWrapper().cuda()
        self.fp16_model = FP16Model(self.orig_model)

    def test_params_and_buffers(self):
        exempted_modules = [
            self.fp16_model.network.bn,
            self.fp16_model.network.dn.db1.bn,
            self.fp16_model.network.dn.db2.bn,
        ]
        for m in self.fp16_model.modules():
            expected_dtype = torch.float if (m in exempted_modules) else torch.half
            for p in m.parameters(recurse=False):
                assert p.dtype == expected_dtype
            for b in m.buffers(recurse=False):
                assert b.dtype in (expected_dtype, torch.int64)

    def test_output_is_half(self):
        out_tensor = self.fp16_model(self.in_tensor)
        assert out_tensor.dtype == torch.half


================================================
FILE: apex/tests/L0/run_fused_layer_norm/test_fused_layer_norm.py
================================================
import unittest
import os
import random

import torch
import apex

        
class TestFusedLayerNorm(unittest.TestCase):
    def setUp(self):
        self.module = apex.normalization.FusedLayerNorm(normalized_shape=[32, 64], elementwise_affine=False)
        self.input_ = torch.randn(16, 32, 64)
        torch.cuda.manual_seed(42)
        
    def forward_cpu(self, input_):
        self.module.cpu()
        return self.module(input_.cpu())
    
    def forward_cuda(self, input_):
        self.module.cuda()
        return self.module(input_.cuda())
    
    def test_forward_cuda(self):
        out_ = self.forward_cuda(self.input_)
        assert out_.is_cuda == True
        
    def test_forward_cpu(self):
        out_ = self.forward_cpu(self.input_)
        assert out_.is_cuda == False
        
    def test_same_output(self):
        out_cpu = self.forward_cpu(self.input_)
        out_cuda = self.forward_cuda(self.input_)
        torch.testing.assert_allclose(out_cpu, out_cuda.cpu())
        
        
class TestFusedLayerNormElemWise(TestFusedLayerNorm):
    def setUp(self):
        self.module = apex.normalization.FusedLayerNorm(normalized_shape=[32, 64], elementwise_affine=True)
        self.input_ = torch.randn(16, 32, 64)
        torch.cuda.manual_seed(42)

================================================
FILE: apex/tests/L0/run_mixed_adam/__init__.py
================================================


================================================
FILE: apex/tests/L0/run_mixed_adam/test_fp16_optimizer.py
================================================
import unittest
import torch
import apex

class TestFP16Optimizer(unittest.TestCase):
    def setUp(self, max_abs_diff=1e-3, max_rel_diff=1, iters=7):
        self.max_abs_diff = max_abs_diff
        self.max_rel_diff = max_rel_diff
        self.iters = iters
        torch.cuda.manual_seed(13337)

        N, D_in, D_out = 64, 1024, 16
        self.N = N
        self.D_in = D_in
        self.D_out = D_out
        self.x = torch.randn((N, D_in), dtype=torch.float16, device='cuda')
        self.ref_model = torch.nn.Linear(D_in, D_out).cuda().half()
        self.tst_model = torch.nn.Linear(D_in, D_out).cuda().half()
        for p,q in zip(self.tst_model.parameters(), self.ref_model.parameters()):
            p.data.copy_(q.data)

    def get_max_diff(self, ref_param, tst_param):
        max_abs_diff = max_rel_diff = 0
        for p_ref, p_tst in zip(ref_param, tst_param):
            max_abs_diff_p = (p_ref - p_tst).abs().max().item()
            max_rel_diff_p = ((p_ref - p_tst) / p_ref).abs().max().item()

            if max_abs_diff_p > max_abs_diff:  max_abs_diff = max_abs_diff_p
            if max_rel_diff_p > max_rel_diff:  max_rel_diff = max_rel_diff_p

        return max_abs_diff, max_rel_diff

    def test_fp16_optimizer(self):

        ref_optim = torch.optim.Adam(self.ref_model.parameters())
        ref_optim = apex.fp16_utils.FP16_Optimizer(ref_optim, verbose=False)

        tst_optim = apex.optimizers.FusedAdam(self.tst_model.parameters())
        tst_optim = apex.optimizers.FP16_Optimizer(tst_optim)

        for i in range(self.iters):
            ref_loss = self.ref_model(self.x).sum()
            ref_optim.backward(ref_loss)
            ref_optim.step()

            tst_loss = self.tst_model(self.x).sum()
            tst_optim.backward(tst_loss)
            tst_optim.step()

            max_abs_diff, max_rel_diff = self.get_max_diff(self.ref_model.parameters(), self.tst_model.parameters())
            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)


    def test_loss_scaling(self):

        ref_optim = torch.optim.Adam(self.ref_model.parameters())
        ref_optim = apex.fp16_utils.FP16_Optimizer(ref_optim, static_loss_scale=128.0, verbose=False)

        tst_optim = apex.optimizers.FusedAdam(self.tst_model.parameters())
        tst_optim = apex.optimizers.FP16_Optimizer(tst_optim, static_loss_scale=128.0)

        for i in range(self.iters):
            ref_loss = self.ref_model(self.x).sum()
            ref_optim.backward(ref_loss)
            ref_optim.step()

            tst_loss = self.tst_model(self.x).sum()
            tst_optim.backward(tst_loss)
            tst_optim.step()

            max_abs_diff, max_rel_diff = self.get_max_diff(self.ref_model.parameters(), self.tst_model.parameters())
            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)

    def test_parameter_groups(self):

        ref_groups = [{'params': [self.ref_model.weight]},{'params': [self.ref_model.bias]}]
        ref_optim = torch.optim.Adam(ref_groups)
        ref_optim = apex.fp16_utils.FP16_Optimizer(ref_optim, verbose=False)

        tst_groups = [{'params': [self.tst_model.weight]},{'params': [self.tst_model.bias]}]
        tst_optim = apex.optimizers.FusedAdam(tst_groups)
        tst_optim = apex.optimizers.FP16_Optimizer(tst_optim)

        for i in range(self.iters):
            ref_loss = self.ref_model(self.x).sum()
            ref_optim.backward(ref_loss)
            ref_optim.step()

            tst_loss = self.tst_model(self.x).sum()
            tst_optim.backward(tst_loss)
            tst_optim.step()

            max_abs_diff, max_rel_diff = self.get_max_diff(self.ref_model.parameters(), self.tst_model.parameters())
            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)

    def test_grad_clip(self):
        ref_optim = torch.optim.Adam(self.ref_model.parameters())
        ref_optim = apex.fp16_utils.FP16_Optimizer(ref_optim, verbose=False)

        tst_optim = apex.optimizers.FusedAdam(self.tst_model.parameters(), max_grad_norm=0.01)
        tst_optim = apex.optimizers.FP16_Optimizer(tst_optim)

        for i in range(self.iters):
            ref_loss = self.ref_model(self.x).sum()
            ref_optim.backward(ref_loss)
            ref_optim.clip_master_grads(0.01)
            ref_optim.step()

            tst_loss = self.tst_model(self.x).sum()
            tst_optim.backward(tst_loss)
            tst_optim.step()

            max_abs_diff, max_rel_diff = self.get_max_diff(self.ref_model.parameters(), self.tst_model.parameters())
            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)

    @unittest.skip('Not support grad being None')
    def test_grad_None(self):
        self.fail()

    @unittest.skip('Not support same weight decay as pytorch')
    def test_weight_decay(self):
        self.fail()

    @unittest.skip('Not support empty parameter groups')
    def test_group_empty(self):
        self.fail()

if __name__ == '__main__':
    script_path = os.path.dirname(os.path.realpath(__file__))
    unittest.main()


================================================
FILE: apex/tests/L0/run_mixed_adam/test_mixed_adam.py
================================================
import unittest
import os
import random

import torch
import apex

class TestFusedAdam(unittest.TestCase):
    def setUp(self, max_abs_diff=1e-3, max_rel_diff=1, iters=7):
        self.max_abs_diff = max_abs_diff
        self.max_rel_diff = max_rel_diff
        self.iters = iters
        torch.cuda.manual_seed(9876)

    def tearDown(self):
        pass

    def gen_param_optim(self, tensors, adam_option):
        ref_param = []
        tst_param = []
        for tensor in tensors:
            ref_param.append(torch.nn.Parameter(tensor.clone()))
            tst_param.append(torch.nn.Parameter(tensor.clone()))

        ref_optim = torch.optim.Adam(ref_param, **adam_option)
        tst_optim = apex.optimizers.FusedAdam(tst_param, **adam_option)
       
        return (ref_param, tst_param, ref_optim, tst_optim)

    def gen_grad(self, ref_param, tst_param):
        for p_ref, p_tst in zip(ref_param, tst_param):
            p_ref.grad = torch.rand_like(p_ref)
            p_tst.grad = p_ref.grad

    def gen_mixed_grad(self, ref_param, tst_param, scale=1.0):
        half_grads = []
        for p_ref, p_tst in zip(ref_param, tst_param):
            half_grads.append(torch.rand_like(p_ref).half())
            p_ref.grad = half_grads[-1].float() / scale
        return half_grads

    def get_max_diff(self, ref_param, tst_param):
        max_abs_diff = max_rel_diff = 0
        for p_ref, p_tst in zip(ref_param, tst_param):
            max_abs_diff_p = (p_ref - p_tst).abs().max().item()
            max_rel_diff_p = ((p_ref - p_tst) / p_ref).abs().max().item()

            if max_abs_diff_p > max_abs_diff:  max_abs_diff = max_abs_diff_p
            if max_rel_diff_p > max_rel_diff:  max_rel_diff = max_rel_diff_p

        return max_abs_diff, max_rel_diff

    def gen_single_type_test(self, param_type=torch.float):
        nelem = 278011
        adam_option = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
            'weight_decay':0, 'amsgrad':False}

        tensor = torch.rand(nelem, dtype=param_type, device='cuda')
        ref_param, tst_param, ref_optim, tst_optim = \
            self.gen_param_optim([tensor], adam_option)

        for i in range(self.iters):
            self.gen_grad(ref_param, tst_param)
            ref_optim.step()
            tst_optim.step()
            max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)

            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)

    def test_double(self):
        self.gen_single_type_test(param_type=torch.double)

    def test_float(self):
        self.gen_single_type_test(param_type=torch.float)

    def test_half(self):
        nelem = 278011
        adam_option = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
            'weight_decay':0, 'amsgrad':False}

        tensor = torch.rand(nelem, dtype=torch.float, device='cuda')
        ref_param, tst_param, ref_optim, tst_optim = \
            self.gen_param_optim([tensor], adam_option)

        for i in range(self.iters):
            half_grads = self.gen_mixed_grad(ref_param, tst_param)
            ref_optim.step()
            tst_optim.step(grads=half_grads)
            max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)

            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)

    def test_multi_params(self):
        sizes = [[4096, 1024], [4096], [4096, 2048], [32320, 1024], [1]]
        adam_option = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
            'weight_decay':0, 'amsgrad':False}

        tensors = []
        for size in sizes:
            tensors.append(torch.rand(size, dtype=torch.float, device='cuda'))
        ref_param, tst_param, ref_optim, tst_optim = \
            self.gen_param_optim(tensors, adam_option)

        for i in range(self.iters):
            half_grads = self.gen_mixed_grad(ref_param, tst_param)
            ref_optim.step()
            tst_optim.step(grads=half_grads)
            max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)
            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)

    def test_scale(self):
        nelem = 278011
        adam_option = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
            'weight_decay':0, 'amsgrad':False}

        tensor = torch.rand(nelem, dtype=torch.float, device='cuda')
        ref_param, tst_param, ref_optim, tst_optim = \
            self.gen_param_optim([tensor], adam_option)

        for i in range(self.iters):
            scale = random.random() * 1000
            half_grads = self.gen_mixed_grad(ref_param, tst_param, scale)
            ref_optim.step()
            tst_optim.step(grads=half_grads, scale=scale)
            max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)

            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)

    def test_fp16_output(self):
        nelem = 278011
        adam_option = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
            'weight_decay':0, 'amsgrad':False}

        tensor = torch.rand(nelem, dtype=torch.float, device='cuda')
        ref_param, tst_param, ref_optim, tst_optim = \
            self.gen_param_optim([tensor], adam_option)

        fp16_param = torch.nn.Parameter(tensor.clone().half())

        for i in range(self.iters):
            half_grads = self.gen_mixed_grad(ref_param, tst_param)
            ref_optim.step()
            tst_optim.step(grads=half_grads, output_params=[fp16_param])

            max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)
            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)

            max_abs_diff, max_rel_diff = self.get_max_diff(tst_param, \
                [fp16_param.float()])
            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)

    def test_adam_option(self):
        nelem = 1
        adam_option = {'lr':0.01, 'betas':(0.6, 0.9), 'eps':3e-06,
            'weight_decay':0, 'amsgrad':False}

        tensor = torch.rand(nelem, dtype=torch.float, device='cuda')
        ref_param, tst_param, ref_optim, tst_optim = \
            self.gen_param_optim([tensor], adam_option)

        for i in range(self.iters):
            self.gen_grad(ref_param, tst_param)
            ref_optim.step()
            tst_optim.step()
            max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)

            self.assertLessEqual(max_abs_diff, self.max_abs_diff)
            self.assertLessEqual(max_rel_diff, self.max_rel_diff)


if __name__ == '__main__':
    script_path = os.path.dirname(os.path.realpath(__file__))
    unittest.main()


================================================
FILE: apex/tests/L0/run_test.py
================================================
import unittest
import sys

test_dirs = ["run_amp", "run_fp16util", "run_mixed_adam", "run_fused_layer_norm"]

runner = unittest.TextTestRunner(verbosity=2)

errcode = 0

for test_dir in test_dirs:
    suite = unittest.TestLoader().discover(test_dir)

    print("\nExecuting tests from " + test_dir)

    result = runner.run(suite)

    if not result.wasSuccessful():
        errcode = 1

sys.exit(errcode)


================================================
FILE: apex/tests/L1/common/compare.py
================================================
import argparse
import torch

parser = argparse.ArgumentParser(description='Compare')
parser.add_argument('--opt-level', type=str)
parser.add_argument('--keep-batchnorm-fp32', type=str, default=None)
parser.add_argument('--loss-scale', type=str, default=None)
parser.add_argument('--fused-adam', action='store_true')
parser.add_argument('--use_baseline', action='store_true')
args = parser.parse_args()

base_file = str(args.opt_level) + "_" +\
            str(args.loss_scale) + "_" +\
            str(args.keep_batchnorm_fp32) + "_" +\
            str(args.fused_adam)

file_e = "True_" + base_file
file_p = "False_" + base_file
if args.use_baseline:
    file_b = "baselines/True_" + base_file

dict_e = torch.load(file_e)
dict_p = torch.load(file_p)
if args.use_baseline:
    dict_b = torch.load(file_b)

torch.set_printoptions(precision=10)

print(file_e)
print(file_p)
if args.use_baseline:
    print(file_b)

# ugly duplication here...
if not args.use_baseline:
    for n, (i_e, i_p) in enumerate(zip(dict_e["Iteration"], dict_p["Iteration"])):
        assert i_e == i_p, "i_e = {}, i_p = {}".format(i_e, i_p)

        loss_e = dict_e["Loss"][n]
        loss_p = dict_p["Loss"][n]
        assert loss_e == loss_p, "Iteration {}, loss_e = {}, loss_p = {}".format(i_e, loss_e, loss_p)
        print("{:4} {:15.10f} {:15.10f} {:15.10f} {:15.10f}".format(
              i_e,
              loss_e,
              loss_p,
              dict_e["Speed"][n],
              dict_p["Speed"][n]))
else:
    for n, (i_e, i_p) in enumerate(zip(dict_e["Iteration"], dict_p["Iteration"])):
        assert i_e == i_p, "i_e = {}, i_p = {}".format(i_e, i_p)

        loss_e = dict_e["Loss"][n]
        loss_p = dict_p["Loss"][n]
        loss_b = dict_b["Loss"][n]
        assert loss_e == loss_p, "Iteration {}, loss_e = {}, loss_p = {}".format(i_e, loss_e, loss_p)
        assert loss_e == loss_b, "Iteration {}, loss_e = {}, loss_b = {}".format(i_e, loss_e, loss_b)
        print("{:4} {:15.10f} {:15.10f} {:15.10f} {:15.10f} {:15.10f} {:15.10f}".format(
              i_e,
              loss_b,
              loss_e,
              loss_p,
              dict_b["Speed"][n],
              dict_e["Speed"][n],
              dict_p["Speed"][n]))


================================================
FILE: apex/tests/L1/common/main_amp.py
================================================
import argparse
import os
import shutil
import time

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

import numpy as np

try:
    from apex.parallel import DistributedDataParallel as DDP
    from apex.fp16_utils import *
    from apex import amp, optimizers
    from apex.multi_tensor_apply import multi_tensor_applier
except ImportError:
    raise ImportError("Please install apex from https://www.github.com/nvidia/apex to run this example.")

model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))

parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('data', metavar='DIR',
                    help='path to dataset')
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
                    choices=model_names,
                    help='model architecture: ' +
                    ' | '.join(model_names) +
                    ' (default: resnet18)')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                    help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
                    help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                    help='manual epoch number (useful on restarts)')
parser.add_argument('-b', '--batch-size', default=256, type=int,
                    metavar='N', help='mini-batch size per process (default: 256)')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='Initial learning rate.  Will be scaled by <global batch size>/256: args.lr = args.lr*float(args.batch_size*args.world_size)/256.  A warmup schedule will also be applied over the first 5 epochs.')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum')
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)')
parser.add_argument('--print-freq', '-p', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                    help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
                    help='use pre-trained model')

parser.add_argument('--prof', dest='prof', action='store_true',
                    help='Only run 10 iterations for profiling.')
parser.add_argument('--deterministic', action='store_true')

parser.add_argument("--local_rank", default=0, type=int)
parser.add_argument('--sync_bn', action='store_true',
                    help='enabling apex sync BN.')

parser.add_argument('--has-ext', action='store_true')
parser.add_argument('--opt-level', type=str)
parser.add_argument('--keep-batchnorm-fp32', type=str, default=None)
parser.add_argument('--loss-scale', type=str, default=None)
parser.add_argument('--fused-adam', action='store_true')

parser.add_argument('--prints-to-process', type=int, default=10)

cudnn.benchmark = True

def fast_collate(batch):
    imgs = [img[0] for img in batch]
    targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
    w = imgs[0].size[0]
    h = imgs[0].size[1]
    tensor = torch.zeros( (len(imgs), 3, h, w), dtype=torch.uint8 )
    for i, img in enumerate(imgs):
        nump_array = np.asarray(img, dtype=np.uint8)
        if(nump_array.ndim < 3):
            nump_array = np.expand_dims(nump_array, axis=-1)
        nump_array = np.rollaxis(nump_array, 2)

        tensor[i] += torch.from_numpy(nump_array)
        
    return tensor, targets

best_prec1 = 0
args = parser.parse_args()

# Let multi_tensor_applier be the canary in the coalmine
# that verifies if the backend is what we think it is
assert multi_tensor_applier.available == args.has_ext 

print("opt_level = {}".format(args.opt_level))
print("keep_batchnorm_fp32 = {}".format(args.keep_batchnorm_fp32), type(args.keep_batchnorm_fp32))
print("loss_scale = {}".format(args.loss_scale), type(args.loss_scale))


print("\nCUDNN VERSION: {}\n".format(torch.backends.cudnn.version()))

if args.deterministic:
    cudnn.benchmark = False
    cudnn.deterministic = True
    torch.manual_seed(args.local_rank)
    torch.set_printoptions(precision=10)

def main():
    global best_prec1, args

    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled."

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.sync_bn:
        import apex
        print("using apex synced BN")
        model = apex.parallel.convert_syncbn_model(model)

    model = model.cuda()

    # Scale learning rate based on global batch size
    args.lr = args.lr*float(args.batch_size*args.world_size)/256. 
    if args.fused_adam:
        optimizer = optimizers.FusedAdam(model.parameters())
    else:
        optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    model, optimizer = amp.initialize(
        model, optimizer,
        # enabled=False,
        opt_level=args.opt_level,
        keep_batchnorm_fp32=args.keep_batchnorm_fp32,
        loss_scale=args.loss_scale
        )

    if args.distributed:
        # By default, apex.parallel.DistributedDataParallel overlaps communication with 
        # computation in the backward pass.
        # model = DDP(model)
        # delay_allreduce delays all communication to the end of the backward pass.
        model = DDP(model, delay_allreduce=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    # Optionally resume from a checkpoint
    if args.resume:
        # Use a local scope to avoid dangling references
        def resume():
            if os.path.isfile(args.resume):
                print("=> loading checkpoint '{}'".format(args.resume))
                checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda(args.gpu))
                args.start_epoch = checkpoint['epoch']
                best_prec1 = checkpoint['best_prec1']
                model.load_state_dict(checkpoint['state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer'])
                print("=> loaded checkpoint '{}' (epoch {})"
                      .format(args.resume, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(args.resume))
        resume()

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')

    if(args.arch == "inception_v3"):
        crop_size = 299
        val_size = 320 # I chose this value arbitrarily, we can adjust.
    else:
        crop_size = 224
        val_size = 256

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(crop_size),
            transforms.RandomHorizontalFlip(),
            # transforms.ToTensor(), Too slow
            # normalize,
        ]))
    val_dataset = datasets.ImageFolder(valdir, transforms.Compose([
            transforms.Resize(val_size),
            transforms.CenterCrop(crop_size),
        ]))

    train_sampler = None
    val_sampler = None
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate)

    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True,
        sampler=val_sampler,
        collate_fn=fast_collate)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)
        if args.prof:
            break
        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        if args.local_rank == 0:
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer' : optimizer.state_dict(),
            }, is_best)

class data_prefetcher():
    def __init__(self, loader):
        self.loader = iter(loader)
        self.stream = torch.cuda.Stream()
        self.mean = torch.tensor([0.485 * 255, 0.456 * 255, 0.406 * 255]).cuda().view(1,3,1,1)
        self.std = torch.tensor([0.229 * 255, 0.224 * 255, 0.225 * 255]).cuda().view(1,3,1,1)
        # With Amp, it isn't necessary to manually convert data to half.
        # if args.fp16:
        #     self.mean = self.mean.half()
        #     self.std = self.std.half()
        self.preload()

    def preload(self):
        try:
            self.next_input, self.next_target = next(self.loader)
        except StopIteration:
            self.next_input = None
            self.next_target = None
            return
        with torch.cuda.stream(self.stream):
            self.next_input = self.next_input.cuda(non_blocking=True)
            self.next_target = self.next_target.cuda(non_blocking=True)
            # With Amp, it isn't necessary to manually convert data to half.
            # if args.fp16:
            #     self.next_input = self.next_input.half()
            # else:
            self.next_input = self.next_input.float()
            self.next_input = self.next_input.sub_(self.mean).div_(self.std)
            
    def next(self):
        torch.cuda.current_stream().wait_stream(self.stream)
        input = self.next_input
        target = self.next_target
        self.preload()
        return input, target


def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()
    end = time.time()

    run_info_dict = {"Iteration" : [],
                     "Loss" : [],
                     "Speed" : []}

    prefetcher = data_prefetcher(train_loader)
    input, target = prefetcher.next()
    i = -1
    while input is not None:
        i += 1

        # No learning rate warmup for this test, to expose bitwise inaccuracies more quickly
        # adjust_learning_rate(optimizer, epoch, i, len(train_loader))

        if args.prof:
            if i > 10:
                break
        # measure data loading time
        data_time.update(time.time() - end)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))

        if args.distributed:
            reduced_loss = reduce_tensor(loss.data)
            prec1 = reduce_tensor(prec1)
            prec5 = reduce_tensor(prec5)
        else:
            reduced_loss = loss.data

        losses.update(to_python_float(reduced_loss), input.size(0))
        top1.update(to_python_float(prec1), input.size(0))
        top5.update(to_python_float(prec5), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()

        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()

        # for param in model.parameters():
        #     print(param.data.double().sum().item(), param.grad.data.double().sum().item())

        # torch.cuda.synchronize()
        torch.cuda.nvtx.range_push("step")
        optimizer.step()
        torch.cuda.nvtx.range_pop()

        torch.cuda.synchronize()
        # measure elapsed time
        batch_time.update(time.time() - end)

        end = time.time()

        # If you decide to refactor this test, like examples/imagenet, to sample the loss every
        # print_freq iterations, make sure to move this prefetching below the accuracy calculation.
        input, target = prefetcher.next()

        if i % args.print_freq == 0 and i > 1:
            if args.local_rank == 0:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Speed {3:.3f} ({4:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.10f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                       epoch, i, len(train_loader),
                       args.world_size * args.batch_size / batch_time.val,
                       args.world_size * args.batch_size / batch_time.avg,
                       batch_time=batch_time,
                       data_time=data_time, loss=losses, top1=top1, top5=top5))
            run_info_dict["Iteration"].append(i)
            run_info_dict["Loss"].append(losses.val)
            run_info_dict["Speed"].append(args.world_size * args.batch_size / batch_time.val)
            if len(run_info_dict["Loss"]) == args.prints_to_process:
                if args.local_rank == 0:
                    torch.save(run_info_dict,
                               str(args.has_ext) + "_" + str(args.opt_level) + "_" +
                               str(args.loss_scale) + "_" + str(args.keep_batchnorm_fp32) + "_" +
                               str(args.fused_adam))
                quit()


def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()

    prefetcher = data_prefetcher(val_loader)
    input, target = prefetcher.next()
    i = -1
    while input is not None:
        i += 1

        # compute output
        with torch.no_grad():
            output = model(input)
            loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))

        if args.distributed:
            reduced_loss = reduce_tensor(loss.data)
            prec1 = reduce_tensor(prec1)
            prec5 = reduce_tensor(prec5)
        else:
            reduced_loss = loss.data

        losses.update(to_python_float(reduced_loss), input.size(0))
        top1.update(to_python_float(prec1), input.size(0))
        top5.update(to_python_float(prec5), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if args.local_rank == 0 and i % args.print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Speed {2:.3f} ({3:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, len(val_loader),
                   args.world_size * args.batch_size / batch_time.val,
                   args.world_size * args.batch_size / batch_time.avg,
                   batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))

        input, target = prefetcher.next()

    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))

    return top1.avg


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch, step, len_epoch):
    """LR schedule that should yield 76% converged accuracy with batch size 256"""
    factor = epoch // 30

    if epoch >= 80:
        factor = factor + 1

    lr = args.lr*(0.1**factor)

    """Warmup"""
    if epoch < 5:
        lr = lr*float(1 + step + epoch*len_epoch)/(5.*len_epoch)

    # if(args.local_rank == 0):
    #     print("epoch = {}, step = {}, lr = {}".format(epoch, step, lr))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


def reduce_tensor(tensor):
    rt = tensor.clone()
    dist.all_reduce(rt, op=dist.reduce_op.SUM)
    rt /= args.world_size
    return rt

if __name__ == '__main__':
    main()


================================================
FILE: apex/tests/L1/common/run_test.sh
================================================
#!/bin/bash

print_banner() {
  printf "\n\n\n\e[30m\e[42m$1\e[0m\n\n\n\n"
}

print_banner "Distributed status:  $1"

echo $2
DATADIR=$2

if [ -n "$3" ]
then
  USE_BASELINE=""
else
  USE_BASELINE="--use_baseline"
fi

if [ "$1" == "single_gpu" ]
then
  BASE_CMD="python main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"
fi

if [ "$1" == "distributed" ]
then
  BASE_CMD="python -m torch.distributed.launch --nproc_per_node=2 main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"
fi

ADAM_ARGS="--opt-level O2 --keep-batchnorm-fp32 False --fused-adam"

keep_batchnorms=(
""
"--keep-batchnorm-fp32 True"
"--keep-batchnorm-fp32 False"
)

loss_scales=(
""
"--loss-scale 1.0"
"--loss-scale 128.0"
"--loss-scale dynamic"
)

opt_levels=(
"O0"
"O1"
"O2"
"O3"
)

rm True*
rm False*

set -e

print_banner "Installing Apex with --cuda_ext and --cpp_ext"

pushd ../../..
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
popd

for opt_level in "${opt_levels[@]}"
do
  for loss_scale in "${loss_scales[@]}"
  do
    for keep_batchnorm in "${keep_batchnorms[@]}"
    do
      if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
      then
        print_banner "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
        continue
      fi
      print_banner "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --has-ext $DATADIR"
      set -x
      ${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --has-ext $DATADIR
      set +x
    done
  done
done

# Handle FusedAdam separately due to limited support.
# FusedAdam will not be tested for bitwise accuracy against the Python implementation.
# The L0 tests already do so.  These tests are here to ensure that it actually runs,
# and get an idea of performance.
for loss_scale in "${loss_scales[@]}"
do
  print_banner "${BASE_CMD} ${ADAM_ARGS} ${loss_scale} --has-ext $DATADIR"
  set -x
  ${BASE_CMD} ${ADAM_ARGS} ${loss_scale} --has-ext $DATADIR
  set +x
done

print_banner "Reinstalling apex without extensions"

pushd ../../..
pip install -v --no-cache-dir .
popd

for opt_level in "${opt_levels[@]}"
do
  for loss_scale in "${loss_scales[@]}"
  do
    for keep_batchnorm in "${keep_batchnorms[@]}"
    do
      if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
      then
        print_banner "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
        continue
      fi
      print_banner "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} $DATADIR"
      set -x
      ${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} $DATADIR
      set +x
    done
  done
done

print_banner "Checking for bitwise accuracy between Python-only and cpp/cuda extension installs"

for opt_level in "${opt_levels[@]}"
do
  for loss_scale in "${loss_scales[@]}"
  do
    for keep_batchnorm in "${keep_batchnorms[@]}"
    do
      echo ""
      if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
      then
        echo "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
        continue
      fi
      echo "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} [--has-ext] $DATADIR"
      set -x
      python compare.py --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --use_baseline
      set +x
    done
  done
done

print_banner "Reinstalling Apex with --cuda_ext and --cpp_ext"

pushd ../../..
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
popd


================================================
FILE: apex/tests/L1/cross_product/run.sh
================================================
#!/bin/bash

DATADIR="/home/mcarilli/Desktop/pt18data/apex_stale/examples/imagenet/bare_metal_train_val/"
# DATADIR="/opt/home/apex/examples/imagenet/"
cp ../common/* .
bash run_test.sh single_gpu $1 $DATADIR yes


================================================
FILE: apex/tests/L1/cross_product_distributed/run.sh
================================================
#!/bin/bash

cp ../common/* .
bash run_test.sh distributed $1


================================================
FILE: apex/tests/distributed/DDP/ddp_race_condition_test.py
================================================
import torch
import torch.distributed as dist
from torch.nn import Parameter
from torch.nn import Module
from apex.parallel import DistributedDataParallel as DDP
import argparse
import os


parser = argparse.ArgumentParser(description='allreduce hook example')
parser.add_argument("--local_rank", default=0, type=int)
args = parser.parse_args()

args.distributed = False
if 'WORLD_SIZE' in os.environ:
    args.distributed = int(os.environ['WORLD_SIZE']) > 1

if args.distributed:
    args.gpu = args.local_rank % torch.cuda.device_count()
    torch.cuda.set_device(args.gpu)
    torch.distributed.init_process_group(backend='nccl',
                                         init_method='env://')
    args.world_size = torch.distributed.get_world_size()

torch.set_printoptions(precision=10)
torch.manual_seed(args.local_rank)

class Model(Module):
    def __init__(self):
        super(Model, self).__init__()
        self.a = Parameter(torch.cuda.FloatTensor(4096*4096).fill_(1.0))
        self.b = Parameter(torch.cuda.FloatTensor(4096*4096).fill_(2.0))
    def forward(self, input):
        return (input*self.a)*self.b

model = Model()
# model = DDP(model, message_size=1, gradient_predivide_factor=8.0)
model = DDP(model, delay_allreduce=True)
# model = DDP(model, message_size=1, allreduce_trigger_params=[model.b])

x = torch.cuda.FloatTensor(4096*4096)

passed = True
torch.cuda.cudart().cudaProfilerStart()
for i in range(10):
    x.fill_(i + args.local_rank) # fill x with new values every iteration for sanity
    model.zero_grad()
    out = model(x)
    loss = out.sum()
    # torch.cuda.nvtx.range_push("backward")
    loss.backward()
    # torch.cuda.nvtx.range_pop()
    
    # torch.cuda.nvtx.range_push("synchronize() + info")
    # torch.cuda.synchronize()
    print("i = {}".format(i))
    def info(name, param, val):
        expected = val*4096*4096*(2.*i+1)/2.
        actual = param.grad.data.sum().item()
        print(name+": grad.data_ptr() = {}, expected sum {}, got {}".format(
              param.grad.data_ptr(), expected, actual))
        return (expected == actual)
    if not info("model.a", model.module.a, 2.):  passed = False
    if not info("model.b", model.module.b, 1.):  passed = False
    # torch.cuda.nvtx.range_pop()
torch.cuda.cudart().cudaProfilerStop()

print("passed = ", passed)


================================================
FILE: apex/tests/distributed/DDP/run_race_test.sh
================================================
#!/bin/bash

CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 ddp_race_condition_test.py


================================================
FILE: apex/tests/distributed/amp_master_params/amp_master_params.py
================================================
import torch
import argparse
import os
from apex import amp
# FOR DISTRIBUTED: (can also use torch.nn.parallel.DistributedDataParallel instead)
from apex.parallel import DistributedDataParallel

parser = argparse.ArgumentParser()
# FOR DISTRIBUTED:  Parse for the local_rank argument, which will be supplied
# automatically by torch.distributed.launch.
parser.add_argument("--local_rank", default=0, type=int)
args = parser.parse_args()

# FOR DISTRIBUTED:  If we are running under torch.distributed.launch,
# the 'WORLD_SIZE' environment variable will also be set automatically.
args.distributed = False
if 'WORLD_SIZE' in os.environ:
    args.distributed = int(os.environ['WORLD_SIZE']) > 1

if args.distributed:
    # FOR DISTRIBUTED:  Set the device according to local_rank.
    torch.cuda.set_device(args.local_rank)

    # FOR DISTRIBUTED:  Initialize the backend.  torch.distributed.launch will provide
    # environment variables, and requires that you use init_method=`env://`.
    torch.distributed.init_process_group(backend='nccl',
                                         init_method='env://')

    torch.manual_seed(torch.distributed.get_rank())

torch.backends.cudnn.benchmark = True

N, D_in, D_out = 64, 1024, 16

# Each process receives its own batch of "fake input data" and "fake target data."
# The "training loop" in each process just uses this fake batch over and over.
# https://github.com/NVIDIA/apex/tree/master/examples/imagenet provides a more realistic
# example of distributed data sampling for both training and validation.
x = torch.randn(N, D_in, device='cuda')
y = torch.randn(N, D_out, device='cuda')

model = torch.nn.Linear(D_in, D_out).cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

model, optimizer = amp.initialize(model, optimizer, opt_level="O2")

if args.distributed:
    # FOR DISTRIBUTED:  After amp.initialize, wrap the model with
    # apex.parallel.DistributedDataParallel.
    model = DistributedDataParallel(model)
    # torch.nn.parallel.DistributedDataParallel is also fine, with some added args:
    # model = torch.nn.parallel.DistributedDataParallel(model,
    #                                                   device_ids=[args.local_rank],
    #                                                   output_device=args.local_rank)

loss_fn = torch.nn.MSELoss()

for t in range(500):
    optimizer.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    with amp.scale_loss(loss, optimizer) as scaled_loss:
        scaled_loss.backward()
    optimizer.step()

if args.local_rank == 0:
    print("final loss = ", loss)

torch.save(list(model.parameters()), "rank{}model.pth".format(torch.distributed.get_rank()))
torch.save(list(amp.master_params(optimizer)), "rank{}master.pth".format(torch.distributed.get_rank()))


================================================
FILE: apex/tests/distributed/amp_master_params/compare.py
================================================
import torch

model_params_rank0 = torch.load("rank0model.pth",
                           map_location = lambda storage, loc: storage.cuda(0))
model_params_rank1 = torch.load("rank1model.pth",
                                 map_location = lambda storage, loc: storage.cuda(0))
master_params_rank0 = torch.load("rank0master.pth",
                                 map_location = lambda storage, loc: storage.cuda(0))
master_params_rank1 = torch.load("rank1master.pth",
                                 map_location = lambda storage, loc: storage.cuda(0))

for model_rank0, model_rank1, master_rank0, master_rank1 in zip(
        model_params_rank0,
        model_params_rank1,
        master_params_rank0,
        master_params_rank1):
    assert torch.allclose(model_rank0, model_rank1), "Model param mismatch"
    assert torch.allclose(master_rank0, master_rank1), "Master param mismatch"
    # Some debugging/investigation assistance code:
    # maxval, maxind = torch.max(((torch.abs(model_rank0).float())/torch.abs(master_rank0)).view(-1), 0)
    # offending_val_half = model_rank0.view(-1)[maxind.item()]
    # offending_val_float = master_rank0.view(-1)[maxind.item()]
    # print(maxval.item(), maxind.item(), offending_val_half.item(), offending_val_float.item(),
    #       offending_val_float.half().item())
    # rtol needs to be > 2^-11 because of denormals...
    assert torch.allclose(model_rank0, master_rank0.half(), rtol=.005), "Model-master mismatch"

print("OK:  Model and master params match across ranks.")


================================================
FILE: apex/tests/distributed/amp_master_params/run.sh
================================================
#!/bin/bash
python -m torch.distributed.launch --nproc_per_node=2 amp_master_params.py

python compare.py


================================================
FILE: apex/tests/distributed/synced_batchnorm/single_gpu_unit_test.py
================================================
import torch
import numpy as np
import apex
if True:
    print("using setup tools")
    import syncbn
else:
    print("using jit")
    from torch.utils.cpp_extension import load
    syncbn = load(name='syncbn', sources=['../../csrc/syncbn.cpp', '../../csrc/welford.cu'])

def compare(desc, inp1, inp2, error):
    a = inp1.clone().detach().cpu().numpy()
    b = inp2.clone().detach().cpu().numpy()
    close = np.allclose(a,b, error, error)
    if not close:
        print(desc, close)
        z = a - b
        index = (np.abs(z) >= error + error * np.abs(b)).nonzero()
        print("dif    : ", z[index])
        print("inp1   : ", a[index])
        print("inp2   : ", b[index])
    return close

feature_size = 10
space_size = 16
batch_size = 5


error = 1e-5

np.random.seed(1)
dtype = np.float32
inp = (np.random.randn(batch_size, feature_size, space_size, space_size)).astype(dtype)
grad = (np.random.randn(batch_size, feature_size, space_size, space_size)).astype(dtype)
weight = (np.random.randn(feature_size)).astype(dtype)
bias = (np.random.randn(feature_size)).astype(dtype)

type_tensor = torch.cuda.FloatTensor
ref_tensor = torch.cuda.DoubleTensor

inp_t = type_tensor(inp)
weight_t = type_tensor(weight)
bias_t = type_tensor(bias)

inp_r = ref_tensor(inp.transpose(1, 0, 2, 3).reshape(feature_size, -1))
inp2_r = ref_tensor(inp)
weight_r = ref_tensor(weight).view(-1, 1, 1)
bias_r = ref_tensor(bias).view(-1, 1, 1)

grad_output_t = type_tensor(grad)

m = inp_r.mean(1)
b_v = inp_r.var(1, unbiased=False)
unb_v = inp_r.var(1, unbiased=True)

eps = 1e-5

#mean, var, var_biased = syncbn.welford_mean_var(inp_t)
mean, var_biased = syncbn.welford_mean_var(inp_t)
inv_std = 1.0 / torch.sqrt(var_biased + eps)

bn = torch.nn.BatchNorm2d(feature_size).cuda()
bn.momentum = 1.0
bn.weight.data = weight_t.clone()
bn.bias.data = bias_t.clone()
inp_bn = inp_t.clone().requires_grad_()
grad_bn = grad_output_t.clone().detach()
out_bn = bn(inp_bn)
out_bn.backward(grad_bn)

sbn = apex.parallel.SyncBatchNorm(feature_size).cuda()
sbn.momentum = 1.0
sbn.weight.data = weight_t.clone()
sbn.bias.data = bias_t.clone()
inp_sbn = inp_t.clone().requires_grad_()
grad_sbn = grad_output_t.clone().detach()
out_sbn = sbn(inp_sbn)
out_sbn.backward(grad_sbn)

sbn_c_last = apex.parallel.SyncBatchNorm(feature_size, channel_last=True).cuda()
sbn_c_last.momentum = 1.0
sbn_c_last.weight.data = weight_t.clone()
sbn_c_last.bias.data = bias_t.clone()
inp_sbn_c_last = inp_t.clone().transpose(-1, 1).contiguous().requires_grad_()
grad_sbn_c_last = grad_output_t.clone().transpose(-1, 1).contiguous().detach()
out_sbn_c_last = sbn_c_last(inp_sbn_c_last)
out_sbn_c_last.backward(grad_sbn_c_last)

sbn_result = True
sbn_result_c_last = True
bn_result = True

sbn_result = compare("comparing mean: ", mean, m, error) and sbn_result
#sbn_result = compare("comparing variance: ", var, unb_v, error) and sbn_result
sbn_result = compare("comparing biased variance: ", var_biased, b_v, error) and sbn_result


out = syncbn.batchnorm_forward(inp_t, mean, inv_std, weight_t, bias_t)
out_r = weight_r * (inp2_r - m.view(-1, 1, 1)) * torch.rsqrt(b_v.view(-1,1,1) + eps) + bias_r

sbn_result = compare("comparing output: ", out, out_r, error) and sbn_result
compare("comparing bn output: ", out_bn, out_r, error)

grad_output_t = type_tensor(grad)

grad_output_r = ref_tensor(grad.transpose(1, 0, 2, 3).reshape(feature_size, -1))
grad_output2_r = ref_tensor(grad)

grad_bias_r = grad_output_r.sum(1)
grad_weight_r = ((inp2_r - m.view(-1, 1, 1)) * torch.rsqrt(b_v.view(-1,1,1) + eps) * grad_output2_r).transpose(1,0).contiguous().view(feature_size, -1).sum(1)

mean_dy_r = grad_output_r.mean(1)
mean_dy_xmu_r = ((inp2_r - m.view(-1, 1, 1)) * grad_output2_r).transpose(1,0).contiguous().view(feature_size, -1).mean(1)

grad_input_r = (grad_output2_r - mean_dy_r.view(-1, 1, 1) - (inp2_r - m.view(-1, 1, 1)) / (b_v.view(-1,1,1) + eps) * mean_dy_xmu_r.view(-1, 1, 1) ) * torch.rsqrt(b_v.view(-1,1,1) + eps) * weight_r.view(-1,1,1)

mean_dy, mean_dy_xmu, grad_weight, grad_bias = syncbn.reduce_bn(grad_output_t, inp_t, mean, inv_std, weight_t)
grad_input = syncbn.batchnorm_backward(grad_output_t, inp_t, mean, inv_std, weight_t, mean_dy, mean_dy_xmu)
sbn_result = compare("comparing bias grad: ", grad_bias, grad_bias_r, error) and sbn_result
sbn_result = compare("comparing weight grad: ", grad_weight, grad_weight_r, error) and sbn_result
sbn_result = compare("comparing mean_dy grad: ", mean_dy, mean_dy_r, error) and sbn_result
sbn_result = compare("comparing mean_dy_xmu grad: ", mean_dy_xmu, mean_dy_xmu_r, error) and sbn_result
sbn_result = compare("comparing input grad: ", grad_input, grad_input_r, error) and sbn_result
compare("comparing bn input grad: ", inp_bn.grad, grad_input_r, error)
sbn_result = compare("comparing sbn input grad: ", inp_sbn.grad, grad_input_r, error) and sbn_result

compare("comparing bn/sbn output: ", out_bn, out_sbn, error)
sbn_result = compare("comparing running_mean: ", bn.running_mean.data, sbn.running_mean.data, error) and sbn_result
sbn_result = compare("comparing running_variance: ", bn.running_var.data, sbn.running_var.data, error) and sbn_result
compare("comparing grad_input: ", inp_bn.grad, inp_sbn.grad, error)
compare("comparing grad_bias: ", bn.bias.grad, sbn.bias.grad, error)
compare("comparing grad_bias bn to ref: ", bn.bias.grad, grad_bias_r, error)
sbn_result = compare("comparing grad_bias sbn to ref: ", sbn.bias.grad, grad_bias_r, error) and sbn_result
compare("comparing grad_weight: ", bn.weight.grad, sbn.weight.grad, error)
compare("comparing grad_weight bn to ref: ", bn.weight.grad, grad_weight_r, error)
sbn_result = compare("comparing grad_weight sbn to ref: ", sbn.weight.grad, grad_weight_r, error) and sbn_result

compare("comparing channel last bn/sbn output: ", out_bn, out_sbn_c_last.transpose(-1, 1).contiguous(), error)
sbn_result_c_last = compare("comparing channel last running_mean: ", bn.running_mean.data, sbn_c_last.running_mean.data, error) and sbn_result_c_last
sbn_result_c_last = compare("comparing channel last running_variance: ", bn.running_var.data, sbn_c_last.running_var.data, error) and sbn_result_c_last
compare("comparing channel last grad_input: ", inp_bn.grad, inp_sbn_c_last.grad.transpose(-1, 1).contiguous(), error)
compare("comparing channel last grad_bias: ", bn.bias.grad, sbn_c_last.bias.grad, error)
sbn_result_c_last = compare("comparing channel last grad_bias sbn to ref: ", sbn_c_last.bias.grad, grad_bias_r, error) and sbn_result_c_last
compare("comparing channel last grad_weight: ", bn.weight.grad, sbn_c_last.weight.grad, error)
sbn_result_c_last = compare("comparing channel last grad_weight sbn to ref: ", sbn_c_last.weight.grad, grad_weight_r, error) and sbn_result_c_last

if sbn_result:
    print("====SBN single gpu passed tests")
else:
    print("*SBN single gpu failed*")

if sbn_result_c_last:
    print("====SBN channel last single gpu passed tests")
else:
    print("*SBN channel last single gpu failed*")


================================================
FILE: apex/tests/distributed/synced_batchnorm/test_groups.py
================================================
import torch
import numpy as np
import apex
import syncbn
import os
import argparse
import torch.optim as optim

def compare(desc, inp1, inp2, error):
    a = inp1.clone().detach().cpu().numpy()
    b = inp2.clone().detach().cpu().numpy()
    close = np.allclose(a,b, error, error)
    if not close:
        print(desc, close)
        z = a - b
        index = (np.abs(z) >= error + error * np.abs(b)).nonzero()
        print("dif    : ", z[index])
        print("inp1   : ", a[index])
        print("inp2   : ", b[index])
    return close

feature_size = 10
space_size = 40
batch_size = 32


from apex.parallel import DistributedDataParallel as DDP
parser = argparse.ArgumentParser()
parser.add_argument("--local_rank", default=0, type=int)
parser.add_argument("--fp16", action='store_true', default=False)
parser.add_argument("--fp64", action='store_true', default=False)
parser.add_argument("--group_size", default=0, type=int)
args = parser.parse_args()

try:
    args.world_size = int(os.environ['WORLD_SIZE'])
except:
    print("This is a multi-gpu test. To run it please use 'python -m torch.distributed.launch --nproc_per_node=<num gpus> test_groups.py <more options>'")
    exit(1)

torch.cuda.set_device(args.local_rank)
torch.distributed.init_process_group(backend='nccl', init_method='env://')

start = (args.local_rank%args.group_size) * batch_size//args.group_size
finish = (args.local_rank%args.group_size + 1) * batch_size//args.group_size

error = 1e-5
dtype = np.float32
if args.fp16:
    error = 1e-3
    dtype = np.float16
elif args.fp64:
    error = 1e-8
    dtype = np.float64


np.random.seed(18 + args.local_rank//args.group_size)

inp = np.random.randn(batch_size, feature_size, space_size, space_size).astype(dtype)
grad = np.random.randn(batch_size, feature_size, space_size, space_size).astype(dtype)
weight = np.random.randn(feature_size).astype(dtype)
bias = np.random.randn(feature_size).astype(dtype)


type_tensor = torch.cuda.FloatTensor
if args.fp16:
    type_tensor = torch.cuda.HalfTensor
if args.fp64:
    type_tensor = torch.cuda.DoubleTensor

ref_tensor = torch.cuda.DoubleTensor

inp_t = type_tensor(inp)
weight_t = type_tensor(weight)
bias_t = type_tensor(bias)

inp_r = ref_tensor(inp.transpose(1, 0, 2, 3).reshape(feature_size, -1))
inp2_r = ref_tensor(inp)
weight_r = ref_tensor(weight).view(-1, 1, 1)
bias_r = ref_tensor(bias).view(-1, 1, 1)

grad_output_t = type_tensor(grad)

m = inp_r.mean(1)
b_v = inp_r.var(1, unbiased=False)
unb_v = inp_r.var(1, unbiased=True)

eps = 1e-5

mean, var_biased = syncbn.welford_mean_var(inp_t)
inv_std = 1.0 / torch.sqrt(var_biased + eps)

bn = torch.nn.BatchNorm2d(feature_size).cuda()
bn.momentum = 1.0
bn.weight.data = weight_t.clone()
bn.bias.data = bias_t.clone()
if args.fp16:
    bn.half()
if args.fp64:
    bn.double()
bn = DDP(bn)
inp_bn = inp_t.clone().requires_grad_()
grad_bn = grad_output_t.clone().detach()
out_bn = bn(inp_bn)
out_bn.backward(grad_bn)
# compensating the averaging over processes done by DDP
# in order to produce mathematically equivalent result
# https://github.com/NVIDIA/apex/issues/134#issuecomment-458307368
for param in bn.parameters():
    param.grad = param.grad / args.group_size
bn_opt = optim.SGD(bn.parameters(), lr=1.0)

sbn = apex.parallel.SyncBatchNorm(feature_size, process_group=apex.parallel.create_syncbn_process_group(args.group_size)).cuda()
sbn.momentum = 1.0
sbn.weight.data = weight_t.clone()
sbn.bias.data = bias_t.clone()
if args.fp16:
    sbn.half()
if args.fp64:
    sbn.double()
sbn = DDP(sbn)
sbn_opt = optim.SGD(sbn.parameters(), lr=1.0)
inp_sbn = inp_t.clone().requires_grad_()
grad_sbn = grad_output_t.clone().detach()
out_sbn = sbn(inp_sbn[start:finish])
out_sbn.backward(grad_sbn[start:finish])

sbn_result = True
bn_result = True

if args.local_rank == 0:
    sbn_result = compare("comparing mean: ", mean, m, error) and sbn_result
    sbn_result = compare("comparing biased variance: ", var_biased, b_v, error) and sbn_result

out = syncbn.batchnorm_forward(inp_t, mean, inv_std, weight_t, bias_t)
out_r = weight_r * (inp2_r - m.view(-1, 1, 1)) * torch.rsqrt(b_v.view(-1,1,1) + eps) + bias_r

if args.local_rank == 0:
    sbn_result = compare("comparing output: ", out, out_r, error) and sbn_result
    compare("comparing bn output: ", out_bn, out_r, error)

grad_output_t = type_tensor(grad)

grad_output_r = ref_tensor(grad.transpose(1, 0, 2, 3).reshape(feature_size, -1))
grad_output2_r = ref_tensor(grad)

grad_bias_r = grad_output_r.sum(1)
grad_weight_r = ((inp2_r - m.view(-1, 1, 1)) * torch.rsqrt(b_v.view(-1,1,1) + eps) * grad_output2_r).transpose(1,0).contiguous().view(feature_size, -1).sum(1)

mean_dy_r = grad_output_r.mean(1)
mean_dy_xmu_r = ((inp2_r - m.view(-1, 1, 1)) * grad_output2_r).transpose(1,0).contiguous().view(feature_size, -1).mean(1)

grad_input_r = (grad_output2_r - mean_dy_r.view(-1, 1, 1) - (inp2_r - m.view(-1, 1, 1)) / (b_v.view(-1,1,1) + eps) * mean_dy_xmu_r.view(-1, 1, 1) ) * torch.rsqrt(b_v.view(-1,1,1) + eps) * weight_r.view(-1,1,1)

mean_dy, mean_dy_xmu, grad_weight, grad_bias = syncbn.reduce_bn(grad_output_t, inp_t, mean, inv_std, weight_t)
grad_input = syncbn.batchnorm_backward(grad_output_t, inp_t, mean, inv_std, weight_t, mean_dy, mean_dy_xmu)

if args.local_rank == 0:
    sbn_result = compare("comparing bias grad: ", grad_bias, grad_bias_r, error) and sbn_result
    sbn_result = compare("comparing weight grad: ", grad_weight, grad_weight_r, error) and sbn_result
    sbn_result = compare("comparing mean_dy grad: ", mean_dy, mean_dy_r, error) and sbn_result
    sbn_result = compare("comparing mean_dy_xmu grad: ", mean_dy_xmu, mean_dy_xmu_r, error) and sbn_result
    sbn_result = compare("comparing input grad: ", grad_input, grad_input_r, error) and sbn_result
    compare("comparing bn input grad: ", inp_bn.grad, grad_input_r, error)

if args.local_rank == 0:
    sbn_result = compare("comparing running_mean: ", bn.module.running_mean.data, sbn.module.running_mean.data, error) and sbn_result
    sbn_result = compare("comparing running_variance: ", bn.module.running_var.data, sbn.module.running_var.data, error) and sbn_result

# execute by both
compare("comparing layers output: ", out_bn[start:finish], out_sbn, error) and sbn_result
compare("comparing layers grad_input: ", inp_bn.grad[start:finish], inp_sbn.grad[start:finish], error) and sbn_result

bn_opt.step()
sbn_opt.step()

if args.local_rank == 0:
    compare("comparing bn vs sbn bias: ", bn.module.bias, sbn.module.bias, error)
    compare("comparing bn vs sbn weight: ", bn.module.weight, sbn.module.weight, error)


if sbn_result:
    print("====SBN group test passed")
else:
    print("*SBN group test failed*")


================================================
FILE: apex/tests/distributed/synced_batchnorm/two_gpu_unit_test.py
================================================
import torch
import numpy as np
import apex
import syncbn
import os
import argparse
import torch.optim as optim

def compare(desc, inp1, inp2, error):
    a = inp1.clone().detach().cpu().numpy()
    b = inp2.clone().detach().cpu().numpy()
    close = np.allclose(a,b, error, error)
    if not close:
        print(desc, close)
        z = a - b
        index = (np.abs(z) >= error + error * np.abs(b)).nonzero()
        print("dif    : ", z[index])
        print("inp1   : ", a[index])
        print("inp2   : ", b[index])
    return close

feature_size = 10
space_size = 40
batch_size = 32


from apex.parallel import DistributedDataParallel as DDP
parser = argparse.ArgumentParser()
parser.add_argument("--local_rank", default=0, type=int)
parser.add_argument("--fp16", action='store_true', default=False)
parser.add_argument("--fp64", action='store_true', default=False)
args = parser.parse_args()
args.world_size = int(os.environ['WORLD_SIZE'])
torch.cuda.set_device(args.local_rank)
torch.distributed.init_process_group(backend='nccl', init_method='env://')
start = args.local_rank * batch_size//args.world_size
finish = (args.local_rank + 1) * batch_size//args.world_size

error = 1e-5
dtype = np.float32
if args.fp16:
    error = 1e-3
    dtype = np.float16
elif args.fp64:
    error = 1e-8
    dtype = np.float64

np.random.seed(18)
inp = np.random.randn(batch_size, feature_size, space_size, space_size).astype(dtype)
grad = np.random.randn(batch_size, feature_size, space_size, space_size).astype(dtype)
weight = np.random.randn(feature_size).astype(dtype)
bias = np.random.randn(feature_size).astype(dtype)


type_tensor = torch.cuda.FloatTensor
if args.fp16:
    type_tensor = torch.cuda.HalfTensor
if args.fp64:
    type_tensor = torch.cuda.DoubleTensor

ref_tensor = torch.cuda.DoubleTensor

inp_t = type_tensor(inp)
weight_t = type_tensor(weight)
bias_t = type_tensor(bias)

inp_r = ref_tensor(inp.transpose(1, 0, 2, 3).reshape(feature_size, -1))
inp2_r = ref_tensor(inp)
weight_r = ref_tensor(weight).view(-1, 1, 1)
bias_r = ref_tensor(bias).view(-1, 1, 1)

grad_output_t = type_tensor(grad)

m = inp_r.mean(1)
b_v = inp_r.var(1, unbiased=False)
unb_v = inp_r.var(1, unbiased=True)

eps = 1e-5

mean, var_biased = syncbn.welford_mean_var(inp_t)
inv_std = 1.0 / torch.sqrt(var_biased + eps)

bn = torch.nn.BatchNorm2d(feature_size).cuda()
bn.momentum = 1.0
bn.weight.data = weight_t.clone()
bn.bias.data = bias_t.clone()
if args.fp16:
    bn.half()
if args.fp64:
    bn.double()
inp_bn = inp_t.clone().requires_grad_()
grad_bn = grad_output_t.clone().detach()
out_bn = bn(inp_bn)
out_bn.backward(grad_bn)
# compensating the averaging over processes done by DDP
# in order to produce mathematically equivalent result
# https://github.com/NVIDIA/apex/issues/134#issuecomment-458307368
for param in bn.parameters():
    param.grad = param.grad / args.world_size
bn_opt = optim.SGD(bn.parameters(), lr=1.0)

sbn = apex.parallel.SyncBatchNorm(feature_size).cuda()
sbn.momentum = 1.0
sbn.weight.data = weight_t.clone()
sbn.bias.data = bias_t.clone()
if args.fp16:
    sbn.half()
if args.fp64:
    sbn.double()
sbn = DDP(sbn)
sbn_opt = optim.SGD(sbn.parameters(), lr=1.0)
inp_sbn = inp_t.clone().requires_grad_()
grad_sbn = grad_output_t.clone().detach()
out_sbn = sbn(inp_sbn[start:finish])
out_sbn.backward(grad_sbn[start:finish])

sbn_result = True
bn_result = True

if args.local_rank == 0:
    sbn_result = compare("comparing mean: ", mean, m, error) and sbn_result
    sbn_result = compare("comparing biased variance: ", var_biased, b_v, error) and sbn_result

out = syncbn.batchnorm_forward(inp_t, mean, inv_std, weight_t, bias_t)
out_r = weight_r * (inp2_r - m.view(-1, 1, 1)) * torch.rsqrt(b_v.view(-1,1,1) + eps) + bias_r

if args.local_rank == 0:
    sbn_result = compare("comparing output: ", out, out_r, error) and sbn_result
    compare("comparing bn output: ", out_bn, out_r, error)

grad_output_t = type_tensor(grad)

grad_output_r = ref_tensor(grad.transpose(1, 0, 2, 3).reshape(feature_size, -1))
grad_output2_r = ref_tensor(grad)

grad_bias_r = grad_output_r.sum(1)
grad_weight_r = ((inp2_r - m.view(-1, 1, 1)) * torch.rsqrt(b_v.view(-1,1,1) + eps) * grad_output2_r).transpose(1,0).contiguous().view(feature_size, -1).sum(1)

mean_dy_r = grad_output_r.mean(1)
mean_dy_xmu_r = ((inp2_r - m.view(-1, 1, 1)) * grad_output2_r).transpose(1,0).contiguous().view(feature_size, -1).mean(1)

grad_input_r = (grad_output2_r - mean_dy_r.view(-1, 1, 1) - (inp2_r - m.view(-1, 1, 1)) / (b_v.view(-1,1,1) + eps) * mean_dy_xmu_r.view(-1, 1, 1) ) * torch.rsqrt(b_v.view(-1,1,1) + eps) * weight_r.view(-1,1,1)

mean_dy, mean_dy_xmu, grad_weight, grad_bias = syncbn.reduce_bn(grad_output_t, inp_t, mean, inv_std, weight_t)
grad_input = syncbn.batchnorm_backward(grad_output_t, inp_t, mean, inv_std, weight_t, mean_dy, mean_dy_xmu)
if args.local_rank == 0:
    sbn_result = compare("comparing bias grad: ", grad_bias, grad_bias_r, error) and sbn_result
    sbn_result = compare("comparing weight grad: ", grad_weight, grad_weight_r, error) and sbn_result
    sbn_result = compare("comparing mean_dy grad: ", mean_dy, mean_dy_r, error) and sbn_result
    sbn_result = compare("comparing mean_dy_xmu grad: ", mean_dy_xmu, mean_dy_xmu_r, error) and sbn_result
    sbn_result = compare("comparing input grad: ", grad_input, grad_input_r, error) and sbn_result
    compare("comparing bn input grad: ", inp_bn.grad, grad_input_r, error)

if args.local_rank == 0:
    sbn_result = compare("comparing running_mean: ", bn.running_mean.data, sbn.module.running_mean.data, error) and sbn_result
    sbn_result = compare("comparing running_variance: ", bn.running_var.data, sbn.module.running_var.data, error) and sbn_result

# execute by both
compare("comparing layers output: ", out_bn[start:finish], out_sbn, error) and sbn_result
compare("comparing layers grad_input: ", inp_bn.grad[start:finish], inp_sbn.grad[start:finish], error) and sbn_result

bn_opt.step()
sbn_opt.step()

if args.local_rank == 0:
    compare("comparing bn vs sbn bias: ", bn.bias, sbn.module.bias, error)
    compare("comparing bn vs sbn weight: ", bn.weight, sbn.module.weight, error)


if sbn_result:
    print("====SBN two gpu passed tests")
else:
    print("*SBN two gpu failed*")


================================================
FILE: apex/tests/distributed/synced_batchnorm/unit_test.sh
================================================
python single_gpu_unit_test.py
python -m torch.distributed.launch --nproc_per_node=2 two_gpu_unit_test.py
python -m torch.distributed.launch --nproc_per_node=2 two_gpu_unit_test.py --fp64
#beware, you need a system with at least 4 gpus to test group_size<world_size
python -m torch.distributed.launch --nproc_per_node=4 test_groups.py --group_size=2


================================================
FILE: apex/tests/docker_extension_builds/run.sh
================================================
#!/bin/bash

print_banner() {
  printf "\n\n\n\e[30m\e[42m$1\e[0m\n\n\n\n"
}

print_green() {
  printf "\e[30m\e[42m$1\e[0m\n"
}

print_red() {
  printf "\e[30m\e[41m$1\e[0m\n"
}

images=(
"gitlab-master.nvidia.com:5005/dl/dgx/pytorch:19.03-py3-devel"
"gitlab-master.nvidia.com:5005/dl/dgx/pytorch:master-py3-devel"
"pytorch/pytorch:nightly-devel-cuda10.0-cudnn7"
"pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-devel"
"pytorch/pytorch:1.0.1-cuda10.0-cudnn7-devel"
"pytorch/pytorch:1.0-cuda10.0-cudnn7-devel"
"pytorch/pytorch:nightly-devel-cuda9.2-cudnn7"
)

branch="master"

# Associative array for exit codes
declare -A exit_codes
for image in images
do
  exit_codes[$image]="None"
done

for image in "${images[@]}"
do
  print_banner "$image"
  set -x
  docker pull $image
  # Trying python setup.py install instead of pip install to ensure direct access to error codes.
  # Maybe pip install would be ok too but this works.
  docker run --runtime=nvidia --rm $image /bin/bash -c "yes | pip uninstall apex; yes | pip uninstall apex; git clone https://github.com/NVIDIA/apex.git; cd apex; git checkout $branch; set -e;  python setup.py install --cuda_ext --cpp_ext"
  exit_code=$?
  set +x
  if [ $exit_code != 0 ]
  then
    print_red "Exit code: $exit_code"
  else
    print_green "Exit code: $exit_code"
  fi
  exit_codes[$image]=$exit_code
done

success=0
for image in "${images[@]}"
do
  exit_code=${exit_codes[$image]}
  if [ $exit_code != 0 ]
  then
    print_red "$image : $exit_code"
    success=1
  else
    print_green "$image : $exit_code"
  fi
done

if [ $success != 0 ]
then
  print_red "Overall status:  failure"
else
  print_green "Overall status:  success"
fi

exit $success


================================================
FILE: jukebox/Interacting_with_Jukebox.ipynb
================================================
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Interacting with Jukebox",
      "provenance": [],
      "collapsed_sections": [],
      "machine_shape": "hm"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "sAdFGF-bqVMY",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "!pip install git+https://github.com/openai/jukebox.git"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "uq8uLwZCn0BV",
        "colab_type": "text"
      },
      "source": [
        "IMPORTANT NOTE ON SYSTEM REQUIREMENTS:\n",
        "\n",
        "If you are connecting to a hosted runtime, make sure it has a P100 GPU (optionally run !nvidia-smi to confirm). Go to Edit>Notebook Settings to set this.\n",
        "\n",
        "CoLab may first assign you a lower memory machine if you are using a hosted runtime.  If so, the first time you try to load the 5B model, it will run out of memory, and then you'll be prompted to restart with more memory (then return to the top of this CoLab).  If you continue to have memory issues after this (or run into issues on your own home setup), switch to the 1B model.\n",
        "\n",
        "If you are using a local GPU, we recommend V100 or P100 with 16GB GPU memory for best performance. For GPU’s with less memory, we recommend using the 1B model and a smaller batch size throughout.  \n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8qEqdj8u0gdN",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "!nvidia-smi"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "taDHgk1WCC_C",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import jukebox\n",
        "import torch as t\n",
        "import librosa\n",
        "import os\n",
        "from IPython.display import Audio\n",
        "from jukebox.make_models import make_vqvae, make_prior, MODELS, make_model\n",
        "from jukebox.hparams import Hyperparams, setup_hparams\n",
        "from jukebox.sample import sample_single_window, _sample, \\\n",
        "                           sample_partial_window, upsample\n",
        "from jukebox.utils.dist_utils import setup_dist_from_mpi\n",
        "from jukebox.utils.torch_utils import empty_cache\n",
        "rank, local_rank, device = setup_dist_from_mpi()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "89FftI5kc-Az",
        "colab_type": "text"
      },
      "source": [
        "# Sample from the 5B or 1B Lyrics Model\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "65aR2OZxmfzq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "model = \"5b_lyrics\" # or \"1b_lyrics\"     \n",
        "hps = Hyperparams()\n",
        "hps.sr = 44100\n",
        "hps.n_samples = 3 if model=='5b_lyrics' else 8\n",
        "hps.name = 'samples'\n",
        "chunk_size = 16 if model==\"5b_lyrics\" else 32\n",
        "max_batch_size = 3 if model==\"5b_lyrics\" else 16\n",
        "hps.levels = 3\n",
        "hps.hop_fraction = [.5,.5,.125]\n",
        "\n",
        "vqvae, *priors = MODELS[model]\n",
        "vqvae = make_vqvae(setup_hparams(vqvae, dict(sample_length = 1048576)), device)\n",
        "top_prior = make_prior(setup_hparams(priors[-1], dict()), vqvae, device)\n",
        "\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "JYKiwkzy0Iyf",
        "colab_type": "text"
      },
      "source": [
        "Specify your choice of artist, genre, lyrics, and length of musical sample. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-sY9aGHcZP-u",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "sample_length_in_seconds = 60          # Full length of musical sample to generate - we find songs in the 1 to 4 minute\n",
        "                                       # range work well, with generation time proportional to sample length.  \n",
        "                                       # This total length affects how quickly the model \n",
        "                                       # progresses through lyrics (model also generates differently\n",
        "                                       # depending on if it thinks it's in the beginning, middle, or end of sample)\n",
        "\n",
        "hps.sample_length = (int(sample_length_in_seconds*hps.sr)//top_prior.raw_to_tokens)*top_prior.raw_to_tokens\n",
        "assert hps.sample_length >= top_prior.n_ctx*top_prior.raw_to_tokens, f'Please choose a larger sampling rate'"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "qD0qxQeLaTR0",
        "colab": {}
      },
      "source": [
        "metas = [dict(artist = \"Zac Brown Band\",\n",
        "            genre = \"Country\",\n",
        "            total_length = hps.sample_length,\n",
        "            offset = 0,\n",
        "            lyrics = \"\"\"I met a traveller from an antique land,\n",
        "            Who said—“Two vast and trunkless legs of stone\n",
        "            Stand in the desert. . . . Near them, on the sand,\n",
        "            Half sunk a shattered visage lies, whose frown,\n",
        "            And wrinkled lip, and sneer of cold command,\n",
        "            Tell that its sculptor well those passions read\n",
        "            Which yet survive, stamped on these lifeless things,\n",
        "            The hand that mocked them, and the heart that fed;\n",
        "            And on the pedestal, these words appear:\n",
        "            My name is Ozymandias, King of Kings;\n",
        "            Look on my Works, ye Mighty, and despair!\n",
        "            Nothing beside remains. Round the decay\n",
        "            Of that colossal Wreck, boundless and bare\n",
        "            The lone and level sands stretch far away\n",
        "            \"\"\",\n",
        "            ),\n",
        "          ] * hps.n_samples\n",
        "labels = [None, None, top_prior.labeller.get_batch_labels(metas, 'cuda')]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6PHC1XnEfV4Y",
        "colab_type": "text"
      },
      "source": [
        "Optionally adjust the sampling temperature (we've found .98 or .99 to be our favorite).  \n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "eNwKyqYraTR9",
        "colab": {}
      },
      "source": [
        "sampling_temperature = .98\n",
        "\n",
        "lower_batch_size = 16\n",
        "max_batch_size = 3 if model == \"5b_lyrics\" else 16\n",
        "lower_level_chunk_size = 32\n",
        "chunk_size = 16 if model == \"5b_lyrics\" else 32\n",
        "sampling_kwargs = [dict(temp=.99, fp16=True, max_batch_size=lower_batch_size,\n",
        "                        chunk_size=lower_level_chunk_size),\n",
        "                    dict(temp=0.99, fp16=True, max_batch_size=lower_batch_size,\n",
        "                         chunk_size=lower_level_chunk_size),\n",
        "                    dict(temp=sampling_temperature, fp16=True, \n",
        "                         max_batch_size=max_batch_size, chunk_size=chunk_size)]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "S3j0gT3HfrRD",
        "colab_type": "text"
      },
      "source": [
        "Now we're ready to sample from the model. We'll generate the top level (2) first, followed by the first upsampling (level 1), and the second upsampling (0).  In this CoLab we load the top prior separately from the upsamplers, because of memory concerns on the hosted runtimes. If you are using a local machine, you can also load all models directly with make_models, and then use sample.py's ancestral_sampling to put this all in one step.\n",
        "\n",
        "After each level, we decode to raw audio and save the audio files.   \n",
        "\n",
        "This next cell will take a while (approximately 10 minutes per 20 seconds of music sample)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2nET_YBEopyp",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "zs = [t.zeros(hps.n_samples,0,dtype=t.long, device='cuda') for _ in range(len(priors))]\n",
        "zs = _sample(zs, labels, sampling_kwargs, [None, None, top_prior], [2], hps)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "-gxY9aqHqfLJ",
        "colab_type": "text"
      },
      "source": [
        "Listen to the results from the top level (note this will sound very noisy until we do the upsampling stage).  You may have more generated samples, depending on the batch size you requested."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TPZENDGZqOOb",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "Audio(f'{hps.name}/level_2/item_0.wav')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "EJc3bQxmusc6",
        "colab_type": "text"
      },
      "source": [
        "We are now done with the large top_prior model, and instead load the upsamplers."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "W5VLX0zRapIm",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Set this False if you are on a local machine that has enough memory (this allows you to do the\n",
        "# lyrics alignment visualization during the upsampling stage). For a hosted runtime, \n",
        "# we'll need to go ahead and delete the top_prior if you are using the 5b_lyrics model.\n",
        "if True:\n",
        "  del top_prior\n",
        "  empty_cache()\n",
        "  top_prior=None\n",
        "upsamplers = [make_prior(setup_hparams(prior, dict()), vqvae, 'cpu') for prior in priors[:-1]]\n",
        "labels[:2] = [prior.labeller.get_batch_labels(metas, 'cuda') for prior in upsamplers]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "eH_jUhGDprAt",
        "colab_type": "text"
      },
      "source": [
        "Please note: this next upsampling step will take several hours.  At the free tier, Google CoLab lets you run for 12 hours.  As the upsampling is completed, samples will appear in the Files tab (you can access this at the left of the CoLab), under \"samples\" (or whatever hps.name is currently).  Level 1 is the partially upsampled version, and then Level 0 is fully completed."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9lkJgLolpZ6w",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "zs = upsample(zs, labels, sampling_kwargs, [*upsamplers, top_prior], hps)\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "3SJgBYJPri55",
        "colab_type": "text"
      },
      "source": [
        "Listen to your final sample!"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2ip2PPE0rgAb",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "Audio(f'{hps.name}/level_0/item_0.wav')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8JAgFxytwrLG",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "del upsamplers\n",
        "empty_cache()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "LpvvFH85bbBC",
        "colab_type": "text"
      },
      "source": [
        "# Co-Composing with the 5B or 1B Lyrics Model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "nFDROuS7gFQY",
        "colab_type": "text"
      },
      "source": [
        "For more control over the generations, try co-composing with either the 5B or 1B Lyrics Models.  Again, specify your artist, genre, and lyrics. However, now instead of generating the entire sample, the model will return 3 short options for the opening of the piece (or up to 16 options if you use the 1B model instead).  Choose your favorite, and then continue the loop, for as long as you like.  Throughout these steps, you'll be listening to the audio at the top prior level, which means it will sound quite noisy.  When you are satisfied with your co-creation, continue on through the upsampling section. This will render the piece in higher audio quality.\n",
        "\n",
        "NOTE: CoLab will first assign you a lower memory machine if you are using a hosted runtime.  The next cell will run out of memory, and then you'll be prompted to restart with more memory (then return to the top of this CoLab).  If you continue to have memory issues after this (or run into issues on your own home setup), switch to the 1B model. "
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3y-q8ifhGBlU",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "model = \"5b_lyrics\" # or \"1b_lyrics\"\n",
        "hps = Hyperparams()\n",
        "hps.sr = 44100\n",
        "hps.n_samples = 3 if model=='5b_lyrics' else 16\n",
        "hps.name = 'co_composer'\n",
        "hps.sample_length = 1048576 if model==\"5b_lyrics\" else 786432 \n",
        "chunk_size = 16 if model==\"5b_lyrics\" else 32\n",
        "max_batch_size = 3 if model==\"5b_lyrics\" else 16\n",
        "hps.hop_fraction = [.5, .5, .125] \n",
        "hps.levels = 3\n",
        "\n",
        "vqvae, *priors = MODELS[model]\n",
        "vqvae = make_vqvae(setup_hparams(vqvae, dict(sample_length = hps.sample_length)), device)\n",
        "top_prior = make_prior(setup_hparams(priors[-1], dict()), vqvae, device)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "68hz4x7igq0c",
        "colab_type": "text"
      },
      "source": [
        "Choose your artist, genre, and lyrics here!"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "QDMvH_1zUHo6",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "total_sample_length_in_seconds = 120\n",
        "metas = [dict(artist = \"Zac Brown Band\",\n",
        "            genre = \"Country\",\n",
        "            total_length = total_sample_length_in_seconds * hps.sr,\n",
        "            offset = 0,\n",
        "            lyrics = \"\"\"I met a traveller from an antique land,\n",
        "            Who said—“Two vast and trunkless legs of stone\n",
        "            Stand in the desert. . . . Near them, on the sand,\n",
        "            Half sunk a shattered visage lies, whose frown,\n",
        "            And wrinkled lip, and sneer of cold command,\n",
        "            Tell that its sculptor well those passions read\n",
        "            Which yet survive, stamped on these lifeless things,\n",
        "            The hand that mocked them, and the heart that fed;\n",
        "            And on the pedestal, these words appear:\n",
        "            My name is Ozymandias, King of Kings;\n",
        "            Look on my Works, ye Mighty, and despair!\n",
        "            Nothing beside remains. Round the decay\n",
        "            Of that colossal Wreck, boundless and bare\n",
        "            The lone and level sands stretch far away\n",
        "            \"\"\",\n",
        "            ),\n",
        "          ] * hps.n_samples\n",
        "labels = top_prior.labeller.get_batch_labels(metas, 'cuda')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "B9onZMEXh34f",
        "colab_type": "text"
      },
      "source": [
        "## Generate 3 options for the start of the song\n",
        "\n",
        "Initial generation is set to be 4 seconds long, but feel free to change this"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "c6peEj8I_HHO",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def seconds_to_tokens(sec, sr, prior, chunk_size):\n",
        "  tokens = sec * hps.sr // prior.raw_to_tokens\n",
        "  tokens = ((tokens // chunk_size) + 1) * chunk_size\n",
        "  assert tokens <= prior.n_ctx, 'Choose a shorter generation length to stay within the top prior context'\n",
        "  return tokens"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2gn2GXt3zt3y",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "initial_generation_in_seconds = 4\n",
        "tokens_to_sample = seconds_to_tokens(initial_generation_in_seconds, hps.sr, top_prior, chunk_size)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "U0zcWcMoiigl",
        "colab_type": "text"
      },
      "source": [
        "Change the sampling temperature if you like (higher is more random).  Our favorite is in the range .98 to .995"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "NHbH68H7VMeO",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "sampling_temperature = .98\n",
        "sampling_kwargs = dict(temp=sampling_temperature, fp16=True,\n",
        "                       max_batch_size=max_batch_size, chunk_size=chunk_size)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "JGZEPe-WTt4g",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "zs=[t.zeros(hps.n_samples,0,dtype=t.long, device='cuda') for _ in range(3)]\n",
        "zs=sample_partial_window(zs, labels, sampling_kwargs, 2, top_prior, tokens_to_sample, hps)\n",
        "x = vqvae.decode(zs[2:], start_level=2).cpu().numpy()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "mveN4Be8jK2J",
        "colab_type": "text"
      },
      "source": [
        "Listen to your generated samples, and then pick a favorite. If you don't like any, go back and rerun the cell above. \n",
        "\n",
        "** NOTE this is at the noisy top level, upsample fully (in the next section) to hear the final audio version"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "LrJSGMhUOhZg",
        "colab": {}
      },
      "source": [
        "for i in range(hps.n_samples):\n",
        "  librosa.output.write_wav(f'noisy_top_level_generation_{i}.wav', x[i], sr=44100)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "rQ4ersQ5OhZr",
        "colab": {}
      },
      "source": [
        "Audio('noisy_top_level_generation_0.wav')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "-GdqzrGkOhZv",
        "colab": {}
      },
      "source": [
        "Audio('noisy_top_level_generation_1.wav')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "gE5S8hyZOhZy",
        "colab": {}
      },
      "source": [
        "Audio('noisy_top_level_generation_2.wav')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "t2-mEJaqZfuS",
        "colab_type": "text"
      },
      "source": [
        "If you don't like any of the options, return a few cells back to \"Sample a few options...\" and rerun from there."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "o7CzSiv0MmFP",
        "colab_type": "text"
      },
      "source": [
        "## Choose your favorite sample and request longer generation\n",
        "\n",
        "---\n",
        "\n",
        "(Repeat from here)\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "j_XFtVi99CIY",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "my_choice=0"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Pgk3sHHBLYoq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "zs[2]=zs[2][my_choice].repeat(hps.n_samples,1)\n",
        "t.save(zs, 'zs-checkpoint2.t')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "W8Rd9xxm565S",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Set to True to load the previous checkpoint:\n",
        "if False:\n",
        "  zs=t.load('zs-checkpoint2.t') "
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "k12xjMgHkRGP",
        "colab_type": "text"
      },
      "source": [
        "Choose the length of the continuation.  The 1B model can generate up to 17 second samples and the 5B up to 23 seconds, but you'll want to pick a shorter continuation length so that it will be able to look back at what you've generated already.  Here we've chosen 4 seconds."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "h3_-0a07kHHG",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "continue_generation_in_seconds=4\n",
        "tokens_to_sample = seconds_to_tokens(continue_generation_in_seconds, hps.sr, top_prior, chunk_size)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "GpPG3Ifqk8ue",
        "colab_type": "text"
      },
      "source": [
        "The next step asks the top prior to generate more of the sample. It'll take up to a few minutes, depending on the sample length you request."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YoHkeSTaEyLj",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "zs = sample_partial_window(zs, labels, sampling_kwargs, 2, top_prior, tokens_to_sample, hps)\n",
        "x = vqvae.decode(zs[2:], start_level=2).cpu().numpy()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ymhUqEdhleEi",
        "colab_type": "text"
      },
      "source": [
        "Now listen to the longer versions of the sample you selected, and again choose a favorite sample.  If you don't like any, return back to the cell where you can load the checkpoint, and continue again from there.\n",
        "\n",
        "When the samples start getting long, you might not always want to listen from the start, so change the playback start time later on if you like."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2H1LNLTa_R6a",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "playback_start_time_in_seconds = 0 "
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "r4SBGAmsnJtH",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "for i in range(hps.n_samples):\n",
        "  librosa.output.write_wav(f'top_level_continuation_{i}.wav', x[i][playback_start_time_in_seconds*44100:], sr=44100)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2WeyE5Qtnmeo",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "Audio('top_level_continuation_0.wav')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BKtfEtcaazXE",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "Audio('top_level_continuation_1.wav')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7yrlS0XwK2S0",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "Audio('top_level_continuation_2.wav')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "-OJT704dvnGv",
        "colab_type": "text"
      },
      "source": [
        "To make a longer song, return back to \"Choose your favorite sample\" and loop through that again"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RzCrkCZJvUcQ",
        "colab_type": "text"
      },
      "source": [
        "# Upsample Co-Composition to Higher Audio Quality"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4MPgukwMmB0p",
        "colab_type": "text"
      },
      "source": [
        "Choose your favorite sample from your latest group of generations.  (If you haven't already gone through the Co-Composition block, make sure to do that first so you have a generation to upsample)."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yv-pNNPHBQYC",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "choice = 0\n",
        "select_best_sample = True  # Set false if you want to upsample all your samples \n",
        "                           # upsampling sometimes yields subtly different results on multiple runs,\n",
        "                           # so this way you can choose your favorite upsampling"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "v17cEAqyCgfo",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "if select_best_sample:\n",
        "  zs[2]=zs[2][choice].repeat(zs[2].shape[0],1)\n",
        "\n",
        "t.save(zs, 'zs-top-level-final.t')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "0YjK-Ac0tBfu",
        "colab_type": "text"
      },
      "source": [
        "Note: If you are using a CoLab hosted runtime on the free tier, you may want to download this zs-top-level-final.t file, and then restart an instance and load it in the next cell.  The free tier will last a maximum of 12 hours, and the upsampling stage can take many hours, depending on how long a sample you have generated."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qqlR9368s3jJ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "if False:\n",
        "  zs = t.load('zs-top-level-final.t')\n",
        "\n",
        "assert zs[2].shape[1]>=2048, f'Please first generate at least 2048 tokens at the top level, currently you have {zs[2].shape[1]}'\n",
        "hps.sample_length = zs[2].shape[1]*top_prior.raw_to_tokens"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jzHwF_iqgIWM",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Set this False if you are on a local machine that has enough memory (this allows you to do the\n",
        "# lyrics alignment visualization). For a hosted runtime, we'll need to go ahead and delete the top_prior\n",
        "# if you are using the 5b_lyrics model.\n",
        "if True:\n",
        "  del top_prior\n",
        "  empty_cache()\n",
        "  top_prior=None\n",
        "\n",
        "upsamplers = [make_prior(setup_hparams(prior, dict()), vqvae, 'cpu') for prior in priors[:-1]]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "q22Ier6YSkKS",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "sampling_kwargs = [dict(temp=.99, fp16=True, max_batch_size=16, chunk_size=32),\n",
        "                    dict(temp=0.99, fp16=True, max_batch_size=16, chunk_size=32),\n",
        "                    None]\n",
        "\n",
        "if type(labels)==dict:\n",
        "  labels = [prior.labeller.get_batch_labels(metas, 'cuda') for prior in upsamplers] + [labels] "
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "T1MCa9_jnjpf",
        "colab_type": "text"
      },
      "source": [
        "This next step upsamples 2 levels.  The level_1 samples will be available after around one hour (depending on the length of your sample) and are saved under {hps.name}/level_0/item_0.wav, while the fully upsampled level_0 will likely take 4-12 hours. You can access the wav files down below, or using the \"Files\" panel at the left of this CoLab.\n",
        "\n",
        "(Please note, if you are using this CoLab on Google's free tier, you may want to download intermediate steps as the connection will last for a maximum 12 hours.)"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "NcNT5qIRMmHq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "zs = upsample(zs, labels, sampling_kwargs, [*upsamplers, top_prior], hps)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "W2jTYLPBc29M",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "Audio(f'{hps.name}/level_0/item_0.wav')"
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}

================================================
FILE: jukebox/__init__.py
================================================


================================================
FILE: jukebox/align.py
================================================
"""
Get alignment from attn values
1. run a forward pass on each hop, get attn values
2. concat for all hops
"""
import numpy as np
import torch as t
from jukebox.utils.torch_utils import assert_shape, empty_cache
from jukebox.hparams import Hyperparams
from jukebox.make_models import make_model
from jukebox.save_html import save_html
from jukebox.utils.sample_utils import get_starts
import fire

def get_alignment(x, zs, labels, prior, fp16, hps):
    level = hps.levels - 1 # Top level used
    n_ctx, n_tokens = prior.n_ctx, prior.n_tokens
    z = zs[level]
    bs, total_length = z.shape[0], z.shape[1]
    if total_length < n_ctx:
        padding_length = n_ctx - total_length
        z = t.cat([z, t.zeros(bs, n_ctx - total_length, dtype=z.dtype, device=z.device)], dim=1)
        total_length = z.shape[1]
    else:
        padding_length = 0

    hop_length = int(hps.hop_fraction[level]*prior.n_ctx)
    n_head = prior.prior.transformer.n_head
    alignment_head, alignment_layer = prior.alignment_head, prior.alignment_layer
    attn_layers = set([alignment_layer])
    alignment_hops = {}
    indices_hops = {}

    prior.cuda()
    empty_cache()
    for start in get_starts(total_length, n_ctx, hop_length):
        end = start + n_ctx

        # set y offset, sample_length and lyrics tokens
        y, indices_hop = prior.get_y(labels, start, get_indices=True)
        assert len(indices_hop) == bs
        for indices in indices_hop:
            assert len(indices) == n_tokens

        z_bs = t.chunk(z, bs, dim=0)
        y_bs = t.chunk(y, bs, dim=0)
        w_hops = []
        for z_i, y_i in zip(z_bs, y_bs):
            w_hop = prior.z_forward(z_i[:,start:end], [], y_i, fp16=fp16, get_attn_weights=attn_layers)
            assert len(w_hop) == 1
            w_hops.append(w_hop[0][:, alignment_head])
            del w_hop
        w = t.cat(w_hops, dim=0)
        del w_hops
        assert_shape(w, (bs, n_ctx, n_tokens))
        alignment_hop = w.float().cpu().numpy()
        assert_shape(alignment_hop, (bs, n_ctx, n_tokens))
        del w

        # alignment_hop has shape (bs, n_ctx, n_tokens)
        # indices_hop is a list of len=bs, each entry of len hps.n_tokens
        indices_hops[start] = indices_hop
        alignment_hops[start] = alignment_hop
    prior.cpu()
    empty_cache()

    # Combine attn for each hop into attn for full range
    # Use indices to place them into correct place for corresponding source tokens
    alignments = []
    for item in range(bs):
        # Note each item has different length lyrics
        full_tokens = labels['info'][item]['full_tokens']
        alignment = np.zeros((total_length, len(full_tokens) + 1))
        for start in reversed(get_starts(total_length, n_ctx, hop_length)):
            end = start + n_ctx
            alignment_hop = alignment_hops[start][item]
            indices = indices_hops[start][item]
            assert len(indices) == n_tokens
            assert alignment_hop.shape == (n_ctx, n_tokens)
            alignment[start:end,indices] = alignment_hop
        alignment = alignment[:total_length - padding_length,:-1] # remove token padding, and last lyric index
        alignments.append(alignment)
    return alignments

def save_alignment(model, device, hps):
    print(hps)
    vqvae, priors = make_model(model, device, hps, levels=[-1])

    logdir = f"{hps.logdir}/level_{0}"
    data = t.load(f"{logdir}/data.pth.tar")
    if model == '1b_lyrics':
        fp16 = False
    else:
        fp16 = True

    data['alignments'] = get_alignment(data['x'], data['zs'], data['labels'][-1], priors[-1], fp16, hps)
    t.save(data, f"{logdir}/data_align.pth.tar")
    save_html(logdir, data['x'], data['zs'], data['labels'][-1], data['alignments'], hps)

def run(model, port=29500, **kwargs):
    from jukebox.utils.dist_utils import setup_dist_from_mpi
    rank, local_rank, device = setup_dist_from_mpi(port=port)
    hps = Hyperparams(**kwargs)

    with t.no_grad():
        save_alignment(model, device, hps)

if __name__ == '__main__':
    fire.Fire(run)


================================================
FILE: jukebox/data/__init__.py
================================================


================================================
FILE: jukebox/data/artist_genre_processor.py
================================================
import os
import re

accepted = frozenset([chr(i) for i in range(ord('a'), ord('z') + 1)] +
                     [chr(i) for i in range(ord('A'), ord('Z') + 1)] +
                     [chr(i) for i in range(ord('0'), ord('9') + 1)])

rex = re.compile(r'_+')

def norm(s):
    s = ''.join([c if c in accepted else '_' for c in s.lower()])
    s = rex.sub('_', s).strip('_')
    return s

def create_reverse_lookup(atoi):
    # Multiple entries could go to the same artist_id/genre_id
    itoa = {}
    for a, i in atoi.items():
        if i not in itoa:
            itoa[i] = []
        itoa[i].append(a)
    indices = sorted(list(itoa.keys()))
    for i in indices:
        itoa[i] = '_'.join(sorted(itoa[i]))
    return itoa

class ArtistGenreProcessor():
    def __init__(self, v3=False):
        self.v3 = v3
        dirname = os.path.dirname(__file__)
        if self.v3:
            self.artist_id_file = f"{dirname}/ids/v3_artist_ids.txt"
            self.genre_id_file = f"{dirname}/ids/v3_genre_ids.txt"
        else:
            self.artist_id_file = f"{dirname}/ids/v2_artist_ids.txt"
            self.genre_id_file = f"{dirname}/ids/v2_genre_ids.txt"
        self.load_artists()
        self.load_genres()

    def get_artist_id(self, artist):
        input_artist = artist
        if self.v3:
            artist = artist.lower()
        else:
            artist = norm(artist)
        if artist not in self.artist_ids:
            print(f"Input artist {input_artist} maps to {artist}, which is not present in {self.artist_id_file}. "
                  f"Defaulting to (artist_id, artist) = (0, unknown), if that seems wrong please format artist correctly")
        return self.artist_ids.get(artist, 0)

    def get_genre_ids(self, genre):
        if self.v3:
            genres = [genre.lower()]
        else:
            # In v2, we convert genre into a bag of words
            genres = norm(genre).split("_")
        for word in genres:
            if word not in self.genre_ids:
                print(f"Input genre {genre} maps to the list {genres}. {word} is not present in {self.genre_id_file}. "
                      f"Defaulting to (word_id, word) = (0, unknown), if that seems wrong please format genre correctly")
        return [self.genre_ids.get(word, 0) for word in genres]

    # get_artist/genre throw error if we ask for non-present values
    def get_artist(self, artist_id):
        return self.artists[artist_id]

    def get_genre(self, genre_ids):
        if self.v3:
            assert len(genre_ids) == 1
            genre = self.genres[genre_ids[0]]
        else:
            genre = '_'.join([self.genres[genre_id] for genre_id in genre_ids if genre_id >= 0])
        return genre

    def load_artists(self):
        print(f'Loading artist IDs from {self.artist_id_file}')
        self.artist_ids = {}
        with open(self.artist_id_file, 'r', encoding="utf-8") as f:
            for line in f:
                artist, artist_id = line.strip().split(';')
                self.artist_ids[artist.lower()] = int(artist_id)
        self.artists = create_reverse_lookup(self.artist_ids)

    def load_genres(self):
        print(f'Loading artist IDs from {self.genre_id_file}')
        self.genre_ids = {}
        with open(self.genre_id_file, 'r', encoding="utf-8") as f:
            for line in f:
                genre, genre_id = line.strip().split(';')
                self.genre_ids[genre.lower()] = int(genre_id)
        self.genres = create_reverse_lookup(self.genre_ids)


================================================
FILE: jukebox/data/data_processor.py
================================================
import torch as t
import jukebox.utils.dist_adapter as dist
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import DataLoader, Dataset, BatchSampler, RandomSampler
from jukebox.utils.dist_utils import print_all
from jukebox.utils.audio_utils import calculate_bandwidth
from jukebox.data.files_dataset import FilesAudioDataset

class OffsetDataset(Dataset):
    def __init__(self, dataset, start, end, test=False):
        super().__init__()
        self.dataset = dataset
        self.start = start
        self.end = end
        self.test = test
        assert 0 <= self.start < self.end <= len(self.dataset)

    def __len__(self):
        return self.end - self.start

    def __getitem__(self, item):
        return self.dataset.get_item(self.start + item, test=self.test)

class DataProcessor():
    def __init__(self, hps):
        self.dataset = FilesAudioDataset(hps)
        duration = 1 if hps.prior else 600
        hps.bandwidth = calculate_bandwidth(self.dataset, hps, duration=duration)
        self.create_datasets(hps)
        self.create_samplers(hps)
        self.create_data_loaders(hps)
        self.print_stats(hps)

    def set_epoch(self, epoch):
        self.train_sampler.set_epoch(epoch)
        self.test_sampler.set_epoch(epoch)

    def create_datasets(self, hps):
        train_len = int(len(self.dataset) * hps.train_test_split)
        self.train_dataset = OffsetDataset(self.dataset, 0, train_len, test=False)
        self.test_dataset = OffsetDataset(self.dataset, train_len, len(self.dataset), test=True)

    def create_samplers(self, hps):
        if not dist.is_available():
            self.train_sampler = BatchSampler(RandomSampler(self.train_dataset), batch_size=hps.bs, drop_last=True)
            self.test_sampler = BatchSampler(RandomSampler(self.test_dataset), batch_size=hps.bs, drop_last=True)
        else:
            self.train_sampler = DistributedSampler(self.train_dataset)
            self.test_sampler = DistributedSampler(self.test_dataset)

    def create_data_loaders(self, hps):
        # Loader to load mini-batches
        if hps.labels:
            collate_fn = lambda batch: tuple(t.stack([t.from_numpy(b[i]) for b in batch], 0) for i in range(2))
        else:
            collate_fn = lambda batch: t.stack([t.from_numpy(b) for b in batch], 0)

        print('Creating Data Loader')
        self.train_loader = DataLoader(self.train_dataset, batch_size=hps.bs, num_workers=hps.nworkers,
                                       sampler=self.train_sampler, pin_memory=False,
                                       drop_last=True, collate_fn=collate_fn)
        self.test_loader = DataLoader(self.test_dataset, batch_size=hps.bs, num_workers=hps.nworkers,
                                      sampler=self.test_sampler, pin_memory=False,
                                      drop_last=False, collate_fn=collate_fn)

    def print_stats(self, hps):
        print_all(f"Train {len(self.train_dataset)} samples. Test {len(self.test_dataset)} samples")
        print_all(f'Train sampler: {self.train_sampler}')
        print_all(f'Train loader: {len(self.train_loader)}')


================================================
FILE: jukebox/data/files_dataset.py
================================================
import librosa
import math
import numpy as np
import jukebox.utils.dist_adapter as dist
from torch.utils.data import Dataset
from jukebox.utils.dist_utils import print_all
from jukebox.utils.io import get_duration_sec, load_audio
from jukebox.data.labels import Labeller

class FilesAudioDataset(Dataset):
    def __init__(self, hps):
        super().__init__()
        self.sr = hps.sr
        self.channels = hps.channels
        self.min_duration = hps.min_duration or math.ceil(hps.sample_length / hps.sr)
        self.max_duration = hps.max_duration or math.inf
        self.sample_length = hps.sample_length
        assert hps.sample_length / hps.sr < self.min_duration, f'Sample length {hps.sample_length} per sr {hps.sr} ({hps.sample_length / hps.sr:.2f}) should be shorter than min duration {self.min_duration}'
        self.aug_shift = hps.aug_shift
        self.labels = hps.labels
        self.init_dataset(hps)

    def filter(self, files, durations):
        # Remove files too short or too long
        keep = []
        for i in range(len(files)):
            if durations[i] / self.sr < self.min_duration:
                continue
            if durations[i] / self.sr >= self.max_duration:
                continue
            keep.append(i)
        print_all(f'self.sr={self.sr}, min: {self.min_duration}, max: {self.max_duration}')
        print_all(f"Keeping {len(keep)} of {len(files)} files")
        self.files = [files[i] for i in keep]
        self.durations = [int(durations[i]) for i in keep]
        self.cumsum = np.cumsum(self.durations)

    def init_dataset(self, hps):
        # Load list of files and starts/durations
        files = librosa.util.find_files(f'{hps.audio_files_dir}', ['mp3', 'opus', 'm4a', 'aac', 'wav'])
        print_all(f"Found {len(files)} files. Getting durations")
        cache = dist.get_rank() % 8 == 0 if dist.is_available() else True
        durations = np.array([get_duration_sec(file, cache=cache) * self.sr for file in files])  # Could be approximate
        self.filter(files, durations)

        if self.labels:
            self.labeller = Labeller(hps.max_bow_genre_size, hps.n_tokens, self.sample_length, v3=hps.labels_v3)

    def get_index_offset(self, item):
        # For a given dataset item and shift, return song index and offset within song
        half_interval = self.sample_length//2
        shift = np.random.randint(-half_interval, half_interval) if self.aug_shift else 0
        offset = item * self.sample_length + shift # Note we centred shifts, so adding now
        midpoint = offset + half_interval
        assert 0 <= midpoint < self.cumsum[-1], f'Midpoint {midpoint} of item beyond total length {self.cumsum[-1]}'
        index = np.searchsorted(self.cumsum, midpoint)  # index <-> midpoint of interval lies in this song
        start, end = self.cumsum[index - 1] if index > 0 else 0.0, self.cumsum[index] # start and end of current song
        assert start <= midpoint <= end, f"Midpoint {midpoint} not inside interval [{start}, {end}] for index {index}"
        if offset > end - self.sample_length: # Going over song
            offset = max(start, offset - half_interval)  # Now should fit
        elif offset < start: # Going under song
            offset = min(end - self.sample_length, offset + half_interval)  # Now should fit
        assert start <= offset <= end - self.sample_length, f"Offset {offset} not in [{start}, {end - self.sample_length}]. End: {end}, SL: {self.sample_length}, Index: {index}"
        offset = offset - start
        return index, offset

    def get_metadata(self, filename, test):
        """
        Insert metadata loading code for your dataset here.
        If artist/genre labels are different from provided artist/genre lists,
        update labeller accordingly.

        Returns:
            (artist, genre, full_lyrics) of type (str, str, str). For
            example, ("unknown", "classical", "") could be a metadata for a
            piano piece.
        """
        return None, None, None

    def get_song_chunk(self, index, offset, test=False):
        filename, total_length = self.files[index], self.durations[index]
        data, sr = load_audio(filename, sr=self.sr, offset=offset, duration=self.sample_length)
        assert data.shape == (self.channels, self.sample_length), f'Expected {(self.channels, self.sample_length)}, got {data.shape}'
        if self.labels:
            artist, genre, lyrics = self.get_metadata(filename, test)
            labels = self.labeller.get_label(artist, genre, lyrics, total_length, offset)
            return data.T, labels['y']
        else:
            return data.T

    def get_item(self, item, test=False):
        index, offset = self.get_index_offset(item)
        return self.get_song_chunk(index, offset, test)

    def __len__(self):
        return int(np.floor(self.cumsum[-1] / self.sample_length))

    def __getitem__(self, item):
        return self.get_item(item)


================================================
FILE: jukebox/data/ids/v2_artist_ids.txt
================================================
unknown;0
various;0
;0
andr_s_schiff;1
sonny_terry;2
nelly;3
markus_schulz;4
modest_petrovich_mussorgsky;5
otis_redding;6
aerosmith;7
kenny_g;8
james_taylor;9
bobby_bland;10
burning_spear;11
skip_james;12
heart;13
tammy_wynette;14
muse;15
beres_hammond;16
james_newton_howard;17
nelson_freire;18
benny_goodman;19
hank_williams;20
they_might_be_giants;21
the_brian_jonestown_massacre;22
lady_gaga;23
chris_young;24
alison_krauss_union_station;25
seal;26
the_hollies;27
shabba_ranks;28
paul_young;29
iration;30
buck_owens;31
the_weeknd;32
elton_john;33
smokey_robinson;34
roy_orbison;35
headhunterz;36
blondie;37
the_temptations;38
ray_stevens;39
foo_fighters;40
christoph_eschenbach;41
blind_willie_mctell;42
al_martino;43
edwin_fischer;44
victor_young;45
justin_bieber;46
styx;47
doris_day;48
tex_beneke;49
the_monkees;50
richard_wagner;51
bryan_adams;52
alessandro_scarlatti;53
rebelution;54
pitbull;55
nat_king_cole;56
wiz_khalifa;57
roger_miller;58
andy_williams;59
peggy_lee;60
pyotr_ilyich_tchaikovsky;61
booker_t_the_mg_s;62
cilla_black;63
billy_fury;64
vera_lynn;65
enrico_caruso;66
sly_and_robbie;67
the_pretenders;68
the_sweet;69
kylie_minogue;70
kay_kyser;71
san_francisco_symphony;72
prince;73
queen;74
kool_the_gang;75
horace_andy;76
midnite;77
gentleman;78
wilhelm_kempff;79
busta_rhymes;80
the_pogues;81
def_leppard;82
al_jolson;83
king_tubby;84
hot_chocolate;85
delroy_wilson;86
jody_watley;87
bobby_vee;88
johnny_mathis;89
the_rascals;90
sviatoslav_richter;91
fred_astaire;92
john_holt;93
amy_grant;94
b_b_king;95
paul_weston;96
four_tops;97
jay_sean;98
pat_boone;99
george_frideric_handel;100
bing_crosby;101
shalamar;102
tommy_dorsey;103
ludacris;104
kenny_chesney;105
murray_perahia;106
lightnin_hopkins;107
ricky_nelson;108
clint_mansell;109
tom_petty_and_the_heartbreakers;110
gary_glitter;111
ringo_starr;112
phil_collins;113
leo_reisman;114
al_green;115
jim_reeves;116
chris_brown;117
cliff_edwards;118
buddy_guy;119
angelo_badalamenti;120
frank_sinatra;121
santana;122
the_pussycat_dolls;123
peaches_herb;124
radu_lupu;125
the_whispers;126
eddy_howard;127
memphis_slim;128
henry_mancini;129
giuseppe_verdi;130
the_dream;131
vladimir_sofronitsky;132
u_roy;133
ken_boothe;134
the_kinks;135
howard_shore;136
hardwell;137
lou_reed;138
calvin_harris;139
eddy_chen;140
anne_murray;141
juice_newton;142
bee_gees;143
wilson_pickett;144
alan_jackson;145
shirley_bassey;146
waylon_jennings;147
destiny_s_child;148
cab_calloway;149
johnny_copeland;150
bright_eyes;151
trey_songz;152
neil_sedaka;153
justin_timberlake;154
arthur_grumiaux;155
the_who;156
the_yardbirds;157
big_joe_turner;158
duke_ellington;159
herb_alpert;160
laura_branigan;161
michael_jackson;162
john_denver;163
peter_gordon;164
solomon_cutner;165
steve_miller_band;166
don_williams;167
the_pointer_sisters;168
metallica;169
the_ink_spots;170
kenny_rogers;171
the_game;172
gene_krupa;173
snoop_dogg;174
j_cole;175
taylor_dayne;176
r3hab;177
guy_lombardo_and_his_royal_canadians;178
shakin_stevens;179
tim_mcgraw;180
olivia_newton_john_john_travolta;181
the_replacements;182
beenie_man;183
diplo;184
sammy_kaye;185
don_gibson;186
shaggy;187
nina_simone;188
stone_temple_pilots;189
ne_yo;190
huddie_william_ledbetter;191
jerry_goldsmith;192
the_bellamy_brothers;193
winifred_atwell;194
georg_philipp_telemann;195
timbaland;196
the_5th_dimension;197
dionne_warwick;198
r_kelly;199
enrique_iglesias;200
sting;201
mikey_dread;202
yellowman;203
eddie_kendricks;204
blur;205
twista;206
paul_anka;207
chris_cornell;208
a_ha;209
pet_shop_boys;210
yo_yo_ma;211
tom_jones;212
neil_diamond;213
vic_damone;214
paul_oakenfold;215
jascha_heifetz;216
t_i_;217
dinah_washington;218
vladimir_ashkenazy;219
leif_ove_andsnes;220
johnny_cash;221
basement_jaxx;222
sonic_youth;223
the_isley_brothers;224
jason_aldean;225
henryk_szeryng;226
lloyd_price;227
reba_mcentire;228
bon_jovi;229
bed_ich_smetana;230
donny_osmond;231
chuck_berry;232
the_smashing_pumpkins;233
israel_vibration;234
stan_kenton;235
conway_twitty_loretta_lynn;236
mcfly;237
r_e_m;238
morgan_heritage;239
lil_wayne;240
garnett_silk;241
lee_scratch_perry;242
howlin_wolf;243
jon_secada;244
goo_goo_dolls;245
brian_hyland;246
abc;247
clarence_gatemouth_brown;248
wilson_phillips;249
atlantic_starr;250
david_guetta;251
s_rgio_mendes;252
fr_d_ric_chopin;253
robert_palmer;254
charlie_musselwhite;255
level_42;256
peter_andre;257
the_spinners;258
erasure;259
philippe_entremont;260
leo_jan_ek;261
the_kingston_trio;262
ronnie_milsap;263
pat_benatar;264
robert_casadesus;265
t_bone_walker;266
duran_duran;267
jerry_reed;268
franz_liszt;269
journey;270
steve_angello;271
showaddywaddy;272
tears_for_fears;273
john_williams;274
daniel_m_ller_schott;275
tampa_red;276
tina_turner;277
the_beatles;278
miranda_lambert;279
howard_jones;280
reo_speedwagon;281
lady_antebellum;282
no_doubt;283
status_quo;284
tiffany;285
billy_eckstine;286
danny_elfman;287
jimmy_wakely;288
ike_tina_turner;289
george_gershwin;290
dinah_shore;291
armin_van_buuren;292
keith_sweat;293
beaux_arts_trio;294
arcangelo_corelli;295
air_supply;296
w_w;297
the_mighty_diamonds;298
harry_gregson_williams;299
blind_lemon_jefferson;300
brook_benton;301
sizzla;302
bobby_darin;303
steely_dan;304
the_skatalites;305
dwight_yoakam;306
clara_haskil;307
jah_cure;308
cliff_richard;309
fabolous;310
sonny_boy_williamson_ii;311
tinie_tempah;312
claude_debussy;313
jewel;314
bob_seger;315
otis_rush;316
mikhail_pletnev;317
gene_autry;318
blind_blake;319
ethel_waters;320
jackie_wilson;321
big_mama_thornton;322
mary_wells;323
the_mills_brothers;324
rage_against_the_machine;325
barbra_streisand;326
frank_crumit;327
the_clash;328
dion;329
all_4_one;330
t_i;331
orchestral_manoeuvres_in_the_dark;332
culture;333
josh_turner;334
one_direction;335
the_rolling_stones;336
half_pint;337
the_searchers;338
vangelis;339
jackson_browne;340
the_beach_boys;341
train;342
prince_buster;343
tavares;344
eve;345
bessie_smith;346
trace_adkins;347
bay_city_rollers;348
david_allan_coe;349
rascal_flatts;350
petula_clark;351
10cc;352
50_cent;353
the_oak_ridge_boys;354
barry_manilow;355
truls_m_rk;356
morrissey;357
gerry_the_pacemakers;358
kay_starr;359
alborosie;360
engelbert_humperdinck;361
new_order;362
tony_bennett;363
stereophonics;364
jimmy_reed;365
akon;366
echo_the_bunnymen;367
jamiroquai;368
stevie_ray_vaughan;369
ben_e_king;370
cheap_trick;371
dusty_springfield;372
mel_tillis;373
damian_marley;374
ruth_etting;375
westlife;376
diana_ross;377
the_shirelles;378
frankie_laine;379
sarah_vaughan;380
ray_price;381
gordon_lightfoot;382
eddie_cantor;383
the_byrds;384
gary_numan;385
bonnie_tyler;386
aswad;387
brownie_mcghee;388
joshua_bell;389
manic_street_preachers;390
mr_vegas;391
tanya_tucker;392
marvin_gaye_tammi_terrell;393
the_staple_singers;394
ry_cooder;395
john_mayall;396
martina_mcbride;397
anner_bylsma;398
magic_sam;399
avril_lavigne;400
robert_nighthawk;401
the_coasters;402
ace_of_base;403
joseph_szigeti;404
artie_shaw;405
big_bill_broonzy;406
the_ventures;407
rick_astley;408
les_paul;409
leonid_kogan;410
hall_oates;411
three_dog_night;412
charley_pride;413
paul_mccartney;414
alfred_cortot;415
crystal_gayle;416
taj_mahal;417
mary_j_blige;418
leann_rimes;419
mildred_bailey;420
ll_cool_j;421
lobo;422
blake_shelton;423
matt_haimovitz;424
beastie_boys;425
johannes_brahms;426
martha_and_the_vandellas;427
lou_rawls;428
the_righteous_brothers;429
rosalyn_tureck;430
vince_gill;431
gary_moore;432
bad_company;433
marty_robbins;434
russ_morgan;435
david_cassidy;436
dj_khaled;437
leonard_pennario;438
glenn_miller;439
kings_of_leon;440
afrojack;441
cyndi_lauper;442
trisha_yearwood;443
diamond_rio;444
patty_loveless;445
sugar_minott;446
willie_nelson;447
thomas_newman;448
georgia_gibbs;449
eric_carmen;450
ricky_skaggs;451
earth_wind_fire;452
peter_tosh;453
joe_bonamassa;454
lonnie_johnson;455
missy_elliott;456
nicki_minaj;457
luther_allison;458
grigory_sokolov;459
luke_bryan;460
alanis_morissette;461
tracy_lawrence;462
sonny_james;463
brandy;464
michael_bolton;465
boswell_sisters;466
antonio_vivaldi;467
fritz_kreisler;468
elvis_costello_the_attractions;469
woody_herman;470
korn;471
hans_zimmer;472
the_osmonds;473
electric_light_orchestra;474
t_rex;475
van_cliburn;476
harry_james;477
glee_cast;478
simple_minds;479
abba;480
the_jam;481
tanya_stephens;482
the_statler_brothers;483
craig_david;484
moby;485
xxxtentacion;486
paul_simon;487
magic_slim;488
natasha_bedingfield;489
ennio_morricone;490
carpenters;491
joe_tex;492
cocoa_tea;493
the_glee_cast;494
martha_argerich;495
charlie_rich;496
memphis_minnie;497
sheryl_crow;498
jan_dean;499
nick_cave_and_the_bad_seeds;500
del_shannon;501
lee_greenwood;502
gary_lewis_the_playboys;503
marcelle_meyer;504
count_basie;505
harold_melvin_the_blue_notes;506
the_fray;507
the_prodigy;508
alicia_keys;509
conway_twitty;510
barrington_levy;511
b_o_b;512
tritonal;513
mario_lanza;514
mac_davis;515
billy_murray;516
igor_stravinsky;517
pretenders;518
jordin_sparks;519
alice_in_chains;520
ohio_players;521
rick_springfield;522
jimmie_rodgers;523
buju_banton;524
linda_ronstadt;525
george_benson;526
2pac;527
soulja_boy;528
the_flaming_lips;529
ignacy_jan_paderewski;530
gloria_estefan;531
ariana_grande;532
black_uhuru;533
tony_pastor;534
sublime;535
snow_patrol;536
daft_punk;537
johnny_winter;538
robbie_williams;539
eddie_rabbitt;540
james_cotton;541
brad_paisley;542
manfred_mann;543
bassnectar;544
margaret_whiting;545
sam_cooke;546
robert_cray;547
the_beautiful_south;548
barbara_mandrell;549
dick_haymes;550
creedence_clearwater_revival;551
chic;552
ray_charles;553
carter_family;554
ti_sto;555
survivor;556
c_line_dion;557
sergei_prokofiev;558
b_la_bart_k;559
the_congos;560
yefim_bronfman;561
laidback_luke;562
darius_rucker;563
ray_anthony;564
incubus;565
carole_king;566
james_brown;567
swv;568
bruno_mars;569
aphex_twin;570
nitty_gritty_dirt_band;571
gustav_mahler;572
the_shadows;573
the_moody_blues;574
reverend_gary_davis;575
sia;576
gaetano_donizetti;577
earl_hooker;578
the_commodores;579
maria_jo_o_pires;580
eric_donaldson;581
elmore_james;582
sean_kingston;583
don_carlos;584
linkin_park;585
jay_the_americans;586
grand_funk_railroad;587
jls;588
frankie_valli;589
lefty_frizzell;590
en_vogue;591
the_cure;592
perry_como;593
johnny_mercer;594
stevie_wonder;595
ernest_tubb;596
ramin_djawadi;597
ashanti;598
rosemary_clooney;599
anne_sophie_mutter;600
helen_forrest;601
augustus_pablo;602
the_carpenters;603
claudio_arrau;604
bob_dylan;605
joe_simon;606
culture_club;607
the_ipana_troubadors;608
jennifer_lopez;609
karyn_white;610
joe;611
sarah_mclachlan;612
j_geils_band;613
dean_martin;614
hank_snow;615
clyde_mcphatter;616
tlc;617
beck;618
jimmy_dean;619
roy_acuff;620
outkast;621
freddie_mcgregor;622
gus_arnheim;623
gramatik;624
merle_haggard;625
steve_lawrence;626
ma_rainey;627
jimmy_dorsey;628
johnny_paycheck;629
arthur_rubinstein;630
talking_heads;631
capleton;632
les_brown;633
leonard_bernstein;634
bobby_goldsboro;635
kendrick_lamar;636
lenny_kravitz;637
nat_shilkret;638
toby_keith;639
junior_wells;640
billy_j_kramer_the_dakotas;641
peter_paul_mary;642
armand_van_helden;643
h_sker_d_;644
alabama;645
eminem;646
felix_mendelssohn_bartholdy;647
richard_goode;648
pink_floyd;649
sara_evans;650
lonnie_donegan;651
boney_m;652
deadmau5;653
lee_ann_womack;654
eric_clapton;655
ray_parker_jr;656
etta_james;657
the_white_stripes;658
gary_u_s_bonds;659
glen_gray_and_the_casa_loma_orchestra;660
bonnie_raitt;661
soulja_boy_tell_em;662
bobby_rydell;663
carly_simon;664
koko_taylor;665
gregory_isaacs;666
red_hot_chili_peppers;667
josef_suk;668
clint_black;669
buddy_clark;670
tool;671
pierre_fournier;672
alice_cooper;673
lucky_dube;674
jorge_bolet;675
don_diablo;676
r_l_burnside;677
klaus_badelt;678
dolly_parton;679
james_horner;680
green_day;681
henry_burr;682
the_doors;683
roxette;684
louis_armstrong;685
jerry_butler;686
louis_prima;687
paul_van_dyk;688
dennis_brown;689
toni_braxton;690
jerry_lee_lewis;691
donna_summer;692
percy_faith;693
willie_dixon;694
elvis_costello;695
youri_egorov;696
dmitri_shostakovich;697
webb_pierce;698
monica;699
pierre_laurent_aimard;700
muddy_waters;701
garth_brooks;702
boyz_ii_men;703
kris_kristofferson;704
duane_eddy;705
coolio;706
gidon_kremer;707
eurythmics;708
randy_travis;709
collie_buddz;710
faith_evans;711
matisyahu;712
brooks_dunn;713
lulu;714
fletcher_henderson;715
eek_a_mouse;716
shlomo_mintz;717
ray_noble;718
bill_withers;719
ub40;720
b_j_thomas;721
mariah_carey;722
olivia_newton_john;723
jackson_5;724
michael_rose;725
three_days_grace;726
glen_campbell;727
keith_urban;728
onerepublic;729
ted_weems;730
johnny_desmond;731
billy_preston;732
billy_vaughn;733
otis_spann;734
marion_harris;735
richard_marx;736
frankie_carle;737
xavier_cugat;738
zz_top;739
faz_l_say;740
new_york_philharmonic;741
teresa_brewer;742
pixies;743
david_oistrakh;744
rory_gallagher;745
ted_lewis_his_band;746
patsy_cline;747
tracy_byrd;748
daniel_shafran;749
johnny_horton;750
billy_ocean;751
don_mclean;752
ginuwine;753
kiss;754
kaskade;755
the_hold_steady;756
rod_stewart;757
rudolf_serkin;758
the_wanted;759
herman_s_hermits;760
rusty_draper;761
jay_z;762
mississippi_john_hurt;763
linval_thompson;764
billie_holiday;765
lawrence_welk;766
john_lee_hooker;767
the_offspring;768
ramones;769
ronan_keating;770
sir_clifford_michael_curzon;771
dierks_bentley;772
bush;773
texas;774
gene_austin;775
after_all;776
depeche_mode;777
eddy_arnold;778
ferry_corsten;779
inxs;780
jimmie_lunceford;781
johann_sebastian_bach;782
maxi_priest;783
travis_tritt;784
girls_aloud;785
meat_loaf;786
brenda_lee;787
modest_mouse;788
glenn_gould;789
mc_hammer;790
erik_satie;791
the_andrews_sisters;792
alicia_de_larrocha;793
america;794
charles_harrison;795
georges_cziffra;796
peter_serkin;797
gladys_knight_the_pips;798
anton_n_dvo_k;799
emil_gilels;800
myra_hess;801
portugal_the_man;802
rufus;803
sergei_rachmaninoff;804
andr_previn;805
natalie_cole;806
the_killers;807
helen_reddy;808
billy_idol;809
shawn_mendes;810
john_lennon;811
joan_jett;812
jimmy_cliff;813
taylor_swift;814
shura_cherkassky;815
marvin_gaye;816
the_drifters;817
paul_revere_the_raiders;818
donovan;819
alma_gluck;820
fleetwood_mac;821
jo_stafford;822
samson_fran_ois;823
vaughn_monroe;824
tennessee_ernie_ford;825
whitesnake;826
iron_maiden;827
blind_boy_fuller;828
eric_church;829
atomic_kitten;830
harry_nilsson;831
paul_specht;832
bob_marley;833
les_baxter;834
aretha_franklin;835
jessie_j;836
robert_johnson;837
maroon_5;838
sheena_easton;839
george_strait;840
bruce_springsteen;841
oasis;842
jack_white;843
fatboy_slim;844
the_jesus_and_mary_chain;845
hank_williams_jr;846
bill_haley_his_comets;847
charlie_daniels;848
pearl_jam;849
the_stylistics;850
sean_paul;851
david_essex;852
zino_francescatti;853
bruce_hornsby;854
system_of_a_down;855
scott_joplin;856
benny_benassi;857
mitch_miller;858
gloria_gaynor;859
steve_winwood;860
drake;861
bone_thugs_n_harmony;862
nickelback;863
john_mellencamp;864
gene_pitney;865
chet_atkins;866
mark_ronson;867
kc_the_sunshine_band;868
chingy;869
maurice_ravel;870
infected_mushroom;871
walter_gieseking;872
pavement;873
deniece_williams;874
tommy_james_the_shondells;875
harry_belafonte;876
robert_schuman;877
george_jones;878
whitney_houston;879
the_ames_brothers;880
max_romeo;881
patti_page;882
p_nk;883
vienna_philharmonic;884
violent_femmes;885
sly_the_family_stone;886
johnny_marvin;887
sammy_davis_jr_;888
nirvana;889
the_turtles;890
the_manhattans;891
the_supremes;892
gregg_allman;893
alton_ellis;894
giacomo_puccini;895
artur_schnabel;896
2_unlimited;897
michael_rabin;898
toto;899
lil_kim;900
shinedown;901
teddy_wilson;902
alpha_blondy;903
the_guess_who;904
my_chemical_romance;905
matchbox_twenty;906
wolfgang_gartner;907
ed_sheeran;908
faron_young;909
fats_waller;910
kate_smith;911
loretta_lynn;912
antonio_meneses;913
tony_martin;914
frankie_vaughan;915
the_mamas_the_papas;916
david_bowie;917
erskine_hawkins;918
the_four_lads;919
queens_of_the_stone_age;920
the_human_league;921
mud;922
bread;923
dixie_chicks;924
backstreet_boys;925
connie_francis;926
the_black_keys;927
paul_weller;928
pieter_wispelwey;929
the_bachelors;930
janet_jackson;931
the_association;932
edwin_starr;933
hank_williams_jr_;934
steven_isserlis;935
the_doobie_brothers;936
jimi_hendrix;937
sousa_s_band;938
ricky_martin;939
the_cranberries;940
paul_whiteman;941
the_saturdays;942
keb_mo;943
the_psychedelic_furs;944
boyzone;945
the_chemical_brothers;946
johnny_tillotson;947
joe_cocker;948
babyface;949
wet_wet_wet;950
tom_petty;951
lesley_gore;952
deorro;953
war;954
wolfgang_schneiderhan;955
isaac_hayes;956
beyonc;957
gordon_jenkins;958
flo_rida;959
captain_tennille;960
adolf_busch;961
bobby_bare;962
johnnie_taylor;963
above_beyond;964
robin_thicke;965
ja_rule;966
barry_white;967
florence_the_machine;968
the_jimi_hendrix_experience;969
gabrielle;970
leon_fleisher;971
yehudi_menuhin;972
george_harrison;973
kate_bush;974
u2;975
nancy_sinatra;976
the_everly_brothers;977
peter_green;978
cat_stevens;979
the_new_seekers;980
itzhak_perlman;981
red_nichols_his_five_pennies;982
the_script;983
ijahman_levi;984
rihanna;985
joan_jett_and_the_blackhearts;986
the_dorsey_brothers_orchestra;987
charlie_barnet;988
lmfao;989
112;990
kelis;991
john_mccormack;992
guns_n_roses;993
gordon_macrae;994
buddy_holly;995
franz_schubert;996
tommy_roe;997
garbage;998
bow_wow;999
belle_and_sebastian;1000
paul_tortelier;1001
johnny_nash;1002
lazar_berman;1003
avicii;1004
5_seconds_of_summer;1005
mississippi_fred_mcdowell;1006
ivan_moravec;1007
little_richard;1008
john_ogdon;1009
dr_dre;1010
andy_russell;1011
sex_pistols;1012
puddle_of_mudd;1013
june_carter_cash;1014
freddie_king;1015
dino_ciani;1016
h_sker_d;1017
nicky_romero;1018
shania_twain;1019
major_lazer;1020
usher;1021
madonna;1022
nsync;1023
the_o_jays;1024
faith_hill;1025
terence_trent_d_arby;1026
gene_chandler;1027
lisa_stansfield;1028
pharrell_williams;1029
david_geringas;1030
dr_alimantado;1031
jack_scott;1032
vladimir_horowitz;1033
genesis;1034
jessica_simpson;1035
peter_gabriel;1036
spike_jones_and_his_city_slickers;1037
leonard_rose;1038
richard_strauss;1039
mgmt;1040
blink_182;1041
ella_fitzgerald;1042
carrie_underwood;1043
evgeny_kissin;1044
olly_murs;1045
ky_mani_marley;1046
rose_royce;1047
desmond_dekker;1048
montell_jordan;1049
little_river_band;1050
t_pain;1051
gary_allan;1052
chubby_checker;1053
the_diamonds;1054
creed;1055
louis_jordan;1056
vernon_dalhart;1057
gilbert_o_sullivan;1058
ziggy_marley;1059
irene_cara;1060
katy_perry;1061
all_saints;1062
benjamin_britten;1063
p_m_dawn;1064
iona_brown;1065
the_lettermen;1066
natalie_imbruglia;1067
viktoria_mullova;1068
zac_brown_band;1069
bj_rk;1070
mutabaruka;1071
jermaine_jackson;1072
the_impressions;1073
spandau_ballet;1074
luther_vandross;1075
thompson_twins;1076
travis_scott;1077
roots_radics;1078
madness;1079
skrillex;1080
the_smiths;1081
van_halen;1082
the_four_seasons;1083
j_b_lenoir;1084
johnny_rivers;1085
bob_marley_the_wailers;1086
albert_collins;1087
arctic_monkeys;1088
billy_joel;1089
joe_nichols;1090
chicago;1091
coldplay;1092
blue;1093
papa_roach;1094
r_e_m_;1095
eddie_money;1096
ellie_goulding;1097
gym_class_heroes;1098
the_charlie_daniels_band;1099
bette_midler;1100
hubert_sumlin;1101
phil_harris;1102
arturo_benedetti_michelangeli;1103
steps;1104
simon_and_garfunkel;1105
gyptian;1106
britney_spears;1107
lang_lang;1108
blackstreet;1109
the_dave_clark_five;1110
the_chi_lites;1111
wolfgang_amadeus_mozart;1112
johnnie_ray;1113
elvis_presley;1114
red_norvo;1115
the_velvet_underground;1116
nelly_furtado;1117
connee_boswell;1118
soundgarden;1119
dr_hook_the_medicine_show;1120
led_zeppelin;1121
boney_m_;1122
gregor_piatigorsky;1123
john_anderson;1124
joni_james;1125
the_all_american_rejects;1126
marcia_griffiths;1127
ben_bernie;1128
eric_prydz;1129
the_three_suns;1130
albert_king;1131
jodeci;1132
inner_circle;1133
don_cornell;1134
jeff_healey;1135
brian_mcknight;1136
the_stranglers;1137
adam_faith;1138
francis_poulenc;1139
fall_out_boy;1140
the_jets;1141
groundation;1142
randy_newman;1143
jane_s_addiction;1144
bobby_vinton;1145
guiomar_novaes;1146
bo_diddley;1147
philadelphia_orchestra;1148
garrick_ohlsson;1149
evanescence;1150
wilhelm_backhaus;1151
the_platters;1152
the_seekers;1153
selena_gomez;1154
eddy_grant;1155
jan_garber;1156
gioacchino_rossini;1157
christina_aguilera;1158
stevie_nicks;1159
color_me_badd;1160
sandie_shaw;1161
migos;1162
the_allman_brothers_band;1163
will_young;1164
new_edition;1165
radiohead;1166
orbital;1167
the_judds;1168
dan_fogelberg;1169
pascal_rog_;1170
leona_lewis;1171
charlie_patton;1172
yann_tiersen;1173
the_abyssinians;1174
frank_ifield;1175
john_mayer;1176
roger_wolfe_kahn;1177
the_mcguire_sisters;1178
junior_reid;1179
bukka_white;1180
macy_gray;1181
billy_currington;1182
the_tremeloes;1183
ac_dc;1184
little_walter;1185
gorillaz;1186
the_hilltoppers;1187
gil_shaham;1188
little_mix;1189
supertramp;1190
third_world;1191
david_garrett;1192
the_lovin_spoonful;1193
chris_lake;1194
louis_lortie;1195
shakira;1196
vanessa_williams;1197
the_bangles;1198
mark_chesnutt;1199
keith_whitley;1200
roxy_music;1201
betty_hutton;1202
cheryl_cole;1203
the_notorious_b_i_g_;1204
kesha;1205
boston;1206
gerald_moore;1207
heinrich_schiff;1208
joy_division;1209
kenny_loggins;1210
nilsson;1211
counting_crows;1212
simply_red;1213
aaron_tippin;1214
sweet;1215
sugababes;1216
florida_georgia_line;1217
lonnie_mack;1218
jacob_miller;1219
the_police;1220
the_chordettes;1221
fats_domino;1222
willy_deville;1223
bob_crosby;1224
roy_clark;1225
dinu_lipatti;1226
alison_krauss;1227
frankie_avalon;1228
ella_mae_morse;1229
linton_kwesi_johnson;1230
glenn_frey;1231
rudy_vall_e_his_connecticut_yankees;1232
dj_snake;1233
toots_the_maytals;1234
annie_lennox;1235
paramore;1236
john_browning;1237
sister_sledge;1238
julian_lloyd_webber;1239
kanye_west;1240
christopher_cross;1241
professor_longhair;1242
flume;1243
sophie_tucker;1244
third_eye_blind;1245
beyonc_;1246
weezer;1247
kelly_clarkson;1248
take_that;1249
ludwig_van_beethoven;1250
the_cars;1251
george_michael;1252
chaka_khan;1253
arty;1254
steel_pulse;1255
bunny_wailer;1256
the_troggs;1257
nick_lucas;1258
keb_mo_;1259
rise_against;1260
mickey_gilley;1261
poison;1262
debbie_gibson;1263
kim_wilde;1264
sammy_davis_jr;1265
jonas_brothers;1266
cage_the_elephant;1267
son_house;1268
birth_control;1269
faithless;1270
cher;1271
seether;1272
new_kids_on_the_block;1273
rage;1274
richie_spice;1275
sam_smith;1276
the_marvelettes;1277
the_jackson_5;1278
bobbie_gentry;1279
macklemore_ryan_lewis;1280
s_club_7;1281
billy_jones_ernest_hare;1282
arcana;1283
blasterjaxx;1284
will_smith;1285
ray_miller;1286
sugar_ray;1287
ziggy_marley_the_melody_makers;1288
aaron_copland;1289
diddy;1290
daughtry;1291
beach_house;1292
the_dillinger_escape_plan;1293
lonestar;1294
foreigner;1295
lionel_richie;1296
roberta_flack;1297
the_carter_family;1298
demi_lovato;1299
joseph_haydn;1300
sander_van_doorn;1301
underworld;1302
deborah_cox;1303
the_grass_roots;1304
bananarama;1305
iggy_azalea;1306
3_doors_down;1307
the_partridge_family;1308
lead_belly;1309
johnny_long;1310
jagged_edge;1311
the_derek_trucks_band;1312
eddie_fisher;1313
pablo_casals;1314
the_original_dixieland_jazz_band;1315
miley_cyrus;1316
john_powell;1317
the_black_eyed_peas;1318
edvard_grieg;1319
maurizio_pollini;1320
nathan_milstein;1321
twenty_one_pilots;1322
rudolf_firku_n_;1323
nine_inch_nails;1324
the_four_aces;1325
jimmy_rogers;1326
salt_n_pepa;1327
yellow_claw;1328
the_strokes;1329
bobby_brown;1330
juelz_santana;1331
staind;1332
bj_rn_ulvaeus_benny_andersson;1333
the_j_geils_band;1334
the_muppets;1335
gwen_stefani;1336
fedde_le_grand;1337
imagine_dragons;1338
leo_sayer;1339
the_animals;1340
the_b_52_s;1341
furry_lewis;1342
ciara;1343
larry_clinton;1344
dire_straits;1345
pato_banton_the_reggae_revol;1346
goodie_mob;1347
disclosure;1348
georges_bizet;1349
sonny_terry_brownie_mcghee;1350
jim_croce;1351
nelson;1352
jason_derulo;1353
guy_mitchell;1354
the_fontane_sisters;1355
sonny_boy_williamson_i;1356
mirah;1357
jimmy_rushing;1358
john_michael_montgomery;1359
michael_nesmith;1360
george_clinton;1361
burt_bacharach;1362
v6;1363
slim_thug;1364
belinda_carlisle;1365
philip_glass;1366
slade;1367
pete_townshend;1368
oingo_boingo;1369
andrew_lloyd_webber;1370
the_collectors;1371
boy_george;1372
utada_hikaru;1373
mel_torm;1374
diana_krall;1375
melanie_c;1376
john_hammond;1377
peter_green_splinter_group;1378
trouble;1379
g_unit;1380
ferlin_husky;1381
arcade_fire;1382
latino;1383
krayzie_bone;1384
man;1385
dave_clark_five;1386
the_stone_roses;1387
young_jeezy;1388
blood_sweat_tears;1389
kumikameli;1390
dmx;1391
ice_cube;1392
eagles;1393
jill_scott;1394
xtc;1395
peggy_march;1396
michael_bubl;1397
raimon;1398
two_mix;1399
dulce_pontes;1400
the_unseen;1401
hank_locklin;1402
the_notorious_b_i_g;1403
bumblefoot;1404
the_busters;1405
rick_ross;1406
tegan_and_sara;1407
skeeter_davis;1408
curtis_mayfield;1409
sade;1410
wings;1411
lorrie_morgan;1412
saga;1413
a_r_rahman;1414
martha_wainwright;1415
nas;1416
will_i_am;1417
kirsty_maccoll;1418
angel;1419
dave_davies;1420
iggy_pop;1421
jojo;1422
sammy_hagar;1423
ray_davies;1424
and_one;1425
neil_young;1426
mikael_wiehe;1427
the_cardigans;1428
cage;1429
dottie_west;1430
keri_hilson;1431
johnny_hallyday;1432
bill_nelson;1433
fifteen;1434
deen;1435
bobby_v;1436
lil_yachty;1437
too_hort;1438
bernard_lavilliers;1439
hank_thompson;1440
the_chieftains;1441
daryl_hall;1442
antonio_carlos_jobim;1443
aventura;1444
the_the;1445
van_morrison;1446
wynonna_judd;1447
gomez;1448
charles_aznavour;1449
m83;1450
gnr;1451
all;1452
emmylou_harris;1453
lee_hazlewood;1454
mew;1455
uriah_heep;1456
yoko_ono;1457
abw_rts;1458
john_legend;1459
d12;1460
kitty_wells;1461
timbiriche;1462
shel_silverstein;1463
cam_ron;1464
rosanne_cash;1465
2_chainz;1466
tricky;1467
8ball_mjg;1468
flatt_scruggs;1469
bill_anderson;1470
emil_ana_torrini;1471
ufo;1472
mos_def;1473
danzig;1474
juan_gabriel;1475
common;1476
raekwon;1477
france_gall;1478
nicole_scherzinger;1479
r_yksopp;1480
sammie;1481
lena_horne;1482
david_byrne;1483
paul_williams;1484
josh_groban;1485
the_gathering;1486
frank_boeijen;1487
scooter;1488
steve_wariner;1489
mika;1490
pete_seeger;1491
tex_ritter;1492
warrant;1493
porter_wagoner;1494
field_music;1495
three_6_mafia;1496
jim_jones;1497
daniel_o_donnell;1498
brentalfloss;1499
wyclef_jean;1500
hey;1501
bizzy_bone;1502
the_mccalmans;1503
blues_traveler;1504
massive_attack;1505
woody_guthrie;1506
art_garfunkel;1507
andrea_bocelli;1508
david_crosby;1509
dream;1510
soul_asylum;1511
natalie_merchant;1512
shawn_colvin;1513
jonny_lang;1514
funeral_for_a_friend;1515
boz_scaggs;1516
example;1517
lionel_hampton;1518
the_tubes;1519
marc_anthony;1520
good_riddance;1521
moonlight;1522
marc_almond;1523
rza;1524
die_rzte;1525
rbd;1526
alejandro_fern_ndez;1527
wanda_jackson;1528
lara_fabian;1529
julio_iglesias;1530
jeff_beck;1531
peabo_bryson;1532
no_fun_at_all;1533
prong;1534
canibus;1535
krs_one;1536
u_s_bombs;1537
trust;1538
stonewall_jackson;1539
jos_feliciano;1540
m_a;1541
polysics;1542
n_e_r_d;1543
sesame_street;1544
lio;1545
myl_ne_farmer;1546
iris_dement;1547
lily_allen;1548
spoken;1549
architects;1550
jack_johnson;1551
molly_hatchet;1552
cypress_hill;1553
future;1554
the_nits;1555
per_gessle;1556
live;1557
beau;1558
deana_carter;1559
lil_flip;1560
fran_oise_hardy;1561
billy_ray_cyrus;1562
pepper;1563
run_d_m_c;1564
levon_helm;1565
insane_clown_posse;1566
stars;1567
jean_michel_jarre;1568
thunder;1569
juanes;1570
simple_plan;1571
method_man;1572
smash_mouth;1573
meek_mill;1574
fat_joe;1575
kenny_wayne_shepherd;1576
jhen_aiko;1577
the_manhattan_transfer;1578
joss_stone;1579
cee_lo_green;1580
tyrese;1581
charlotte_martin;1582
rodney_crowell;1583
acappella;1584
die_prinzen;1585
pentatonix;1586
abney_park;1587
smooth_mcgroove;1588
the_magnetic_fields;1589
the_nylons;1590
wise_guys;1591
coil;1592
do_as_infinity;1593
lords_of_acid;1594
the_church;1595
chris_rea;1596
jota_quest;1597
miguel_bos;1598
gang_starr;1599
masta_ace;1600
brand_new;1601
mac_miller;1602
cathedral;1603
corrosion_of_conformity;1604
country_joe_mcdonald;1605
eric_johnson;1606
grateful_dead;1607
janis_joplin;1608
john_miles;1609
king_gizzard_the_lizard_wizard;1610
ted_nugent;1611
combichrist;1612
alkaline_trio;1613
anathema;1614
angus_julia_stone;1615
anna_ternheim;1616
anthony_phillips;1617
steve_hackett;1618
aviators;1619
banda_calypso;1620
blue_stahli;1621
the_boys;1622
capital_inicial;1623
city_and_colour;1624
colin_hay;1625
collective_soul;1626
dashboard_confessional;1627
david_rovics;1628
david_usher;1629
die_toten_hosen;1630
funny_van_dannen;1631
dirty_heads;1632
tech_n9ne;1633
elisa;1634
emmerson_nogueira;1635
engenheiros_do_hawaii;1636
eric_bibb;1637
maria_muldaur;1638
panic_at_the_disco;1639
punchline;1640
godsmack;1641
self;1642
heideroosjes;1643
sinner;1644
heather_nova;1645
hoobastank;1646
quietdrive;1647
hyde;1648
jaguares;1649
bert_jansch;1650
jonatha_brooke;1651
joni_mitchell;1652
the_band;1653
josh_garrels;1654
josh_woodward;1655
william_fitzsimmons;1656
katie_melua;1657
jamie_cullum;1658
kristin_hersh;1659
kt_tunstall;1660
legi_o_urbana;1661
the_zombies;1662
francesco_de_gregori;1663
m_ward;1664
beth_orton;1665
magnum;1666
motorpsycho;1667
marillion;1668
jars_of_clay;1669
mason_jennings;1670
matt_nathanson;1671
matthew_good;1672
edguy;1673
gamma_ray;1674
minus_the_bear;1675
mohsen_namjoo;1676
nerina_pallot;1677
never_shout_never;1678
regina_spektor;1679
passenger;1680
paul_kelly;1681
the_style_council;1682
peter_hammill;1683
phantom_planet;1684
phil_keaggy;1685
richard_thompson;1686
said_the_whale;1687
samsas_traum;1688
senses_fail;1689
sevendust;1690
seventh_day_slumber;1691
joan_baez;1692
sister_hazel;1693
slightly_stoopid;1694
sophie_zelmani;1695
suzanne_vega;1696
tatiana;1697
teoman;1698
the_choir;1699
charlotte_church;1700
darlene_zschech;1701
the_front_bottoms;1702
the_maine;1703
the_white_buffalo;1704
little_big_town;1705
threshold;1706
tourniquet;1707
everything_but_the_girl;1708
vertical_horizon;1709
vonda_shepard;1710
warren_zevon;1711
sarah_brightman;1712
blackfoot;1713
black_label_society;1714
z_lia_duncan;1715
2;1716
alejandro_lerner;1717
beth_nielsen_chapman;1718
mercury_rev;1719
brian_wilson;1720
barenaked_ladies;1721
carbon_leaf;1722
celtic_woman;1723
hayley_westenra;1724
crowded_house;1725
delta_goodrem;1726
elbow;1727
resurrection_band;1728
nancy_wilson;1729
janis_ian;1730
jann_arden;1731
jill_sobule;1732
jos_augusto;1733
xuxa;1734
k_d_lang;1735
kim_carnes;1736
los_lobos;1737
mandy_moore;1738
marc_cohn;1739
maureen_mcgovern;1740
melissa_manchester;1741
patti_labelle;1742
helene_fischer;1743
laura_pausini;1744
ivan_lins;1745
thal_a;1746
mike_the_mechanics;1747
paul_carrack;1748
natasha_st_pier;1749
michael_mcdonald;1750
olivia;1751
dizzee_rascal;1752
sam_phillips;1753
serge_gainsbourg;1754
jane_birkin;1755
luis_miguel;1756
sondre_lerche;1757
stan_ridgway;1758
susan_boyle;1759
mike_oldfield;1760
ces_ria_vora;1761
agonoize;1762
funker_vogt;1763
god_module;1764
hocico;1765
nachtmahr;1766
suicide_commando;1767
alejandro_escovedo;1768
southside_johnny_the_asbury_jukes;1769
broken_social_scene;1770
vigilantes_of_love;1771
billy_bragg;1772
wilco;1773
frank_black_and_the_catholics;1774
blue_rodeo;1775
brandi_carlile;1776
patty_griffin;1777
calexico;1778
cass_mccombs;1779
chris_knight;1780
conor_oberst;1781
corb_lund;1782
cowboy_junkies;1783
cracker;1784
cross_canadian_ragweed;1785
dave_alvin;1786
drive_by_truckers;1787
eleni_mandell;1788
lucinda_williams;1789
fred_eaglesmith;1790
dar_williams;1791
the_jayhawks;1792
lana_del_rey;1793
tim_o_brien;1794
hank_williams_iii;1795
james_mcmurtry;1796
joe_henry;1797
john_stewart;1798
josh_ritter;1799
lambchop;1800
nanci_griffith;1801
norma_jean;1802
lyle_lovett;1803
matthew_ryan;1804
my_morning_jacket;1805
neko_case;1806
the_new_pornographers;1807
blue_october;1808
okkervil_river;1809
old_97_s;1810
ray_wylie_hubbard;1811
richmond_fontaine;1812
robert_earl_keen;1813
rocky_votolato;1814
ryan_adams;1815
whiskeytown;1816
son_volt;1817
steve_earle;1818
the_avett_brothers;1819
the_bottle_rockets;1820
the_felice_brothers;1821
arlo_guthrie;1822
the_handsome_family;1823
the_mavericks;1824
ten_years_after;1825
the_walkabouts;1826
todd_snider;1827
vic_chesnutt;1828
iron_wine;1829
wovenhand;1830
x;1831
big_audio_dynamite;1832
globe;1833
carter_the_unstoppable_sex_machine;1834
allison_moorer;1835
front_line_assembly;1836
the_national;1837
the_fall;1838
public_image_ltd;1839
public_enemy;1840
wire;1841
a_tribe_called_quest;1842
de_la_soul;1843
aesop_rock;1844
buck_65;1845
caparezza;1846
childish_gambino;1847
the_roots;1848
colbie_caillat;1849
big_sean;1850
dj_gruff;1851
tyler_the_creator;1852
dokken;1853
fun_lovin_criminals;1854
talib_kweli;1855
jane_air;1856
k_i_z;1857
kid_cudi;1858
jedi_mind_tricks;1859
celph_titled;1860
lupe_fiasco;1861
bun_b;1862
scarface;1863
ghostface_killah;1864
robyn;1865
rehab;1866
swollen_members;1867
styles_p;1868
the_streets;1869
wale;1870
joe_budden;1871
tank;1872
10_years;1873
36_crazyfists;1874
apocalyptica;1875
nina_hagen;1876
anastacia;1877
black_stone_cherry;1878
blindside;1879
breaking_benjamin;1880
bring_me_the_horizon;1881
bullet_for_my_valentine;1882
cave_in;1883
chevelle;1884
d_espairsray;1885
death_angel;1886
deftones;1887
demon_hunter;1888
demon;1889
devin_townsend_project;1890
devin_townsend;1891
doa;1892
dir_en_grey;1893
disturbed;1894
dope;1895
drowning_pool;1896
eighteen_visions;1897
entombed;1898
faith_no_more;1899
fear_factory;1900
fightstar;1901
five_finger_death_punch;1902
finger_eleven;1903
flyleaf;1904
grinspoon;1905
guano_apes;1906
h_blockx;1907
halestorm;1908
hamlet;1909
helmet;1910
bt;1911
ill_ni_o;1912
in_flames;1913
in_this_moment;1914
him;1915
j_b_o;1916
katatonia;1917
killswitch_engage;1918
xzibit;1919
lacuna_coil;1920
phish;1921
limp_bizkit;1922
living_colour;1923
viikate;1924
marilyn_manson;1925
megaherz;1926
falco;1927
melvins;1928
monster_magnet;1929
mushroomhead;1930
nonpoint;1931
soil;1932
otep;1933
p_o_d;1934
powerman_5000;1935
primus;1936
project_86;1937
red;1938
kris_allen;1939
rob_zombie;1940
ozzy_osbourne;1941
rollins_band;1942
saliva;1943
sepultura;1944
shihad;1945
skillet;1946
skindred;1947
slipknot;1948
smile_empty_soul;1949
danielson;1950
soilwork;1951
sonic_syndicate;1952
static_x;1953
stone_sour;1954
taproot;1955
the_notwist;1956
the_word_alive;1957
theory_of_a_deadman;1958
therapy;1959
los_tucanes_de_tijuana;1960
manu_chao;1961
volbeat;1962
zebrahead;1963
hed_p_e;1964
and_you_will_know_us_by_the_trail_of_dead;1965
10_000_maniacs;1966
311;1967
77s;1968
yes;1969
david_lee_roth;1970
hillsong;1971
afi;1972
adam_sandler;1973
afterhours;1974
hawkwind;1975
all_about_eve;1976
all_time_low;1977
allison_crowe;1978
amanda_palmer;1979
american_music_club;1980
amplifier;1981
robert_wyatt;1982
anberlin;1983
andrew_bird;1984
ani_difranco;1985
apoptygma_berzerk;1986
apulanta;1987
arab_strap;1988
joseph_arthur;1989
tom_rosenthal;1990
ash;1991
asian_kung_fu_generation;1992
poets_of_the_fall;1993
babas_nicos;1994
bayside;1995
beatsteaks;1996
ben_folds;1997
ben_folds_five;1998
ben_harper;1999
better_than_ezra;2000
bettie_serveert;2001
big_country;2002
big_head_todd_and_the_monsters;2003
big_sugar;2004
billy_talent;2005
today_is_the_day;2006
red_flag;2007
black_rebel_motorcycle_club;2008
megadeth;2009
blonde_redhead;2010
bob_mould;2011
bodeans;2012
bowling_for_soup;2013
buck_tick;2014
butch_walker;2015
butthole_surfers;2016
caf_tacvba;2017
cake;2018
camper_van_beethoven;2019
carmen_consoli;2020
mario_venuti;2021
franco_battiato;2022
catherine_wheel;2023
catupecu_machu;2024
cem_adrian;2025
john_cale;2026
charlie_brown_jr;2027
nena;2028
chumbawamba;2029
clutch;2030
cl;2031
cmx;2032
coheed_and_cambria;2033
cold_war_kids;2034
travis;2035
coma;2036
concrete_blonde;2037
mint_condition;2038
copeland;2039
crash_test_dummies;2040
joe_jackson;2041
cristian_castro;2042
curve;2043
dada;2044
daniel_amos;2045
daniel_johnston;2046
dave_matthews_band;2047
burning_heads;2048
david_gray;2049
david_sylvian;2050
deacon_blue;2051
deerhoof;2052
del_amitri;2053
dinosaur_jr;2054
dirty_projectors;2055
draco_rosa;2056
duncan_sheik;2057
jeremy_camp;2058
edwyn_collins;2059
eels;2060
nightwish;2061
element_of_crime;2062
embrace;2063
enter_shikari;2064
ulver;2065
everclear;2066
everlast;2067
eyeshine;2068
dio;2069
faust_o;2070
feeder;2071
atmosphere;2072
filter;2073
firewater;2074
fishbone;2075
fountains_of_wayne;2076
four_year_strong;2077
steve_green;2078
fresno;2079
gang_of_four;2080
good_charlotte;2081
blood_on_the_dance_floor;2082
graham_coxon;2083
melissa_etheridge;2084
tony_joe_white;2085
guided_by_voices;2086
robert_pollard;2087
guster;2088
elliott_smith;2089
hedley;2090
hole;2091
hollywood_undead;2092
hot_chip;2093
l_arc_en_ciel;2094
ian_brown;2095
idlewild;2096
jimmy_eat_world;2097
fish;2098
ingrid_michaelson;2099
inme;2100
inspiral_carpets;2101
raf;2102
james;2103
jean_leloup;2104
weird_al_yankovic;2105
jeff_buckley;2106
john_frusciante;2107
dr_john;2108
pj_harvey;2109
jonathan_coulton;2110
juliana_hatfield;2111
julieta_venegas;2112
k_s_choice;2113
kaizers_orchestra;2114
kargo;2115
kasabian;2116
keane;2117
kevin_coyne;2118
kevin_devine;2119
kevin_max;2120
rich_mullins;2121
trooper;2122
suzy_bogguss;2123
kill_hannah;2124
kisp_l_s_a_borz;2125
kult;2126
my_life_with_the_thrill_kill_kult;2127
kutless;2128
la_barranca;2129
la_ley;2130
lao_che;2131
lech_janerka;2132
les_cowboys_fringants;2133
les_fatals_picards;2134
les_rita_mitsouko;2135
sparks;2136
lifehouse;2137
lisa_germano;2138
ed_harcourt;2139
lisa_loeb;2140
liz_phair;2141
local_h;2142
lost_dogs;2143
lostprophets;2144
love_and_rockets;2145
lucybell;2146
lulu_santos;2147
gabriel_o_pensador;2148
adam_lambert;2149
madrugada;2150
mancha_de_rolando;2151
manchester_orchestra;2152
mando_diao;2153
foetus;2154
mark_lanegan;2155
matthew_sweet;2156
max_mo_park;2157
mayday_parade;2158
meat_puppets;2159
men_without_hats;2160
meshell_ndegeocello;2161
midnight_oil;2162
dance_gavin_dance;2163
molotov;2164
ov7;2165
monkey_majik;2166
suede;2167
fernando_ortega;2168
motion_city_soundtrack;2169
mudhoney;2170
mutemath;2171
mercyme;2172
m_o_morta;2173
natalia_lafourcade;2174
natewantstobattle;2175
needtobreathe;2176
split_enz;2177
sum_41;2178
no_te_va_gustar;2179
noir_d_sir;2180
t_tes_raides;2181
o_rappa;2182
o_a_r;2183
ocean_colour_scene;2184
omul_cu_obolani;2185
one_ok_rock;2186
2raumwohnung;2187
our_lady_peace;2188
pain;2189
panda;2190
parokya_ni_edgar;2191
pato_fu;2192
paul_westerberg;2193
pere_ubu;2194
pete_yorn;2195
peter_murphy;2196
placebo;2197
plain_white_t_s;2198
pop_will_eat_itself;2199
porcupine_tree;2200
powderfinger;2201
cat_power;2202
casting_crowns;2203
primal_scream;2204
m_tley_cr_e;2205
the_used;2206
raimundos;2207
mark_knopfler;2208
mark_kozelek;2209
danko_jones;2210
relient_k;2211
raffi;2212
renaud;2213
richard_hawley;2214
rickie_lee_jones;2215
the_shins;2216
rilo_kiley;2217
robyn_hitchcock;2218
mose_allison;2219
roy_harper;2220
rucka_rucka_ali;2221
rx_bandits;2222
saez;2223
samiam;2224
sarah_slean;2225
say_anything;2226
scout_niblett;2227
screaming_females;2228
shannon_wright;2229
silverchair;2230
sin_ad_o_connor;2231
siouxsie_and_the_banshees;2232
sixpence_none_the_richer;2233
skank;2234
skunk_anansie;2235
sleater_kinney;2236
sloan;2237
social_distortion;2238
sophie_hunger;2239
e_40;2240
steve_wynn;2241
subsonica;2242
joe_walsh;2243
super_furry_animals;2244
superchunk;2245
supergrass;2246
swervedriver;2247
switchfoot;2248
dido;2249
takida;2250
taking_back_sunday;2251
teenage_fanclub;2252
w_a_s_p;2253
the_afghan_whigs;2254
the_apples_in_stereo;2255
the_ataris;2256
smoking_popes;2257
the_bluetones;2258
the_breeders;2259
the_cat_empire;2260
the_charlatans_uk;2261
the_clarks;2262
guy_clark;2263
the_comsat_angels;2264
the_connells;2265
the_coral;2266
the_cribs;2267
the_cult;2268
bobby_o;2269
the_mission;2270
blue_yster_cult;2271
the_dandy_warhols;2272
the_dear_hunter;2273
the_decemberists;2274
the_early_november;2275
thievery_corporation;2276
the_fratellis;2277
the_gaslight_anthem;2278
jim_brickman;2279
falling_up;2280
the_hives;2281
the_innocence_mission;2282
the_jazz_butcher;2283
the_jesus_lizard;2284
the_lemonheads;2285
babyshambles;2286
the_living_end;2287
the_matrixx;2288
the_mother_hips;2289
the_mountain_goats;2290
the_muffs;2291
the_pillows;2292
the_posies;2293
the_presidents_of_the_united_states_of_america;2294
the_rasmus;2295
the_raveonettes;2296
the_saints;2297
the_samples;2298
bad_religion;2299
the_smithereens;2300
the_soundtrack_of_our_lives;2301
the_tea_party;2302
mayday;2303
the_triffids;2304
the_vines;2305
the_violet_burning;2306
the_wallflowers;2307
testament;2308
the_divine_comedy;2309
third_day;2310
thrice;2311
tindersticks;2312
tism;2313
tit_s;2314
toad_the_wet_sprocket;2315
tocotronic;2316
tom_mcrae;2317
tori_amos;2318
tracy_chapman;2319
trashcan_sinatras;2320
tre_allegri_ragazzi_morti;2321
tub_ring;2322
unkle;2323
unwritten_law;2324
uverworld;2325
vast;2326
verdena;2327
veruca_salt;2328
face_to_face;2329
virus;2330
voltaire;2331
we_the_kings;2332
the_kooks;2333
lindisfarne;2334
seals_crofts;2335
andy_partridge;2336
xutos_pontap_s;2337
yellowcard;2338
yup;2339
leevi_and_the_leavings;2340
zo;2341
zucchero;2342
z;2343
ebnem_ferah;2344
air;2345
alice;2346
boards_of_canada;2347
brian_eno;2348
burzum;2349
daniel_lanois;2350
enigma;2351
juana_molina;2352
lisa_gerrard;2353
nox_arcana;2354
renard;2355
schiller;2356
sigur_r_s;2357
steven_wilson;2358
swans;2359
wolfgun;2360
xiu_xiu;2361
michael_johnson;2362
montgomery_gentry;2363
the_stanley_brothers;2364
john_waite;2365
shelby_lynne;2366
judy_collins;2367
burl_ives;2368
the_irish_rovers;2369
david_wilcox;2370
devendra_banhart;2371
doc_watson;2372
bill_monroe;2373
michael_martin_murphey;2374
gordon_bok;2375
asleep_at_the_wheel;2376
the_browns;2377
nana_mouskouri;2378
jerry_jeff_walker;2379
steve_goodman;2380
malcolm_holcombe;2381
malvina_reynolds;2382
odetta;2383
tom_paxton;2384
strawbs;2385
phil_ochs;2386
harry_chapin;2387
ramblin_jack_elliott;2388
roger_mcguinn;2389
gene_clark;2390
mat_kearney;2391
the_brothers_four;2392
tom_russell;2393
townes_van_zandt;2394
uncle_dave_macon;2395
delbert_mcclinton;2396
john_hiatt;2397
justin_townes_earle;2398
mark_erelli;2399
over_the_rhine;2400
steve_forbert;2401
manfred_mann_s_earth_band;2402
mot_rhead;2403
rudimentary_peni;2404
illapu;2405
inti_illimani;2406
quilapay_n;2407
v_ctor_jara;2408
skylark;2409
adam_green;2410
cold_chisel;2411
guy_sebastian;2412
jefferson_starship;2413
the_alan_parsons_project;2414
ali_project;2415
modern_talking;2416
animal_collective;2417
banco_del_mutuo_soccorso;2418
ben_lee;2419
bryan_ferry;2420
buffy_sainte_marie;2421
colin_blunstone;2422
cursive;2423
elysian_fields;2424
emerson_lake_palmer;2425
gino_vannelli;2426
g_rard_manset;2427
hot_dad;2428
marina_and_the_diamonds;2429
ismo_alanko;2430
kansas;2431
kari_peitsamo;2432
laibach;2433
laurie_anderson;2434
puhdys;2435
na_o_zumbi;2436
roger_waters;2437
rush;2438
the_walker_brothers;2439
hilltop_hoods;2440
wolfgang_ambros;2441
erste_allgemeine_verunsicherung;2442
jacques_brel;2443
rainhard_fendrich;2444
tom_waits;2445
adrian_belew;2446
anne_clark;2447
can;2448
captain_beefheart_and_the_magic_band;2449
deine_lakaien;2450
devo;2451
einst_rzende_neubauten;2452
frank_zappa;2453
goethes_erben;2454
wishbone_ash;2455
death_cab_for_cutie;2456
antony_and_the_johnsons;2457
jandek;2458
nevermore;2459
king_crimson;2460
king_missile;2461
the_residents;2462
steeleye_span;2463
vampire_rodents;2464
the_walkmen;2465
dog_fashion_disco;2466
freak_kitchen;2467
sigh;2468
children_of_bodom;2469
soft_machine;2470
ara_ketu;2471
asa_de_guia;2472
banda_eva;2473
ivete_sangalo;2474
chiclete_com_banana;2475
daniela_mercury;2476
alejandro_sanz;2477
timbalada;2478
juan_luis_guerra;2479
daddy_yankee;2480
alceu_valen_a;2481
luiz_gonzaga;2482
matia_bazar;2483
axelle_red;2484
barbara;2485
benny_neyman;2486
gigi_d_agostino;2487
jacques_higelin;2488
caetano_veloso;2489
gal_costa;2490
jorge_ben;2491
die_flippers;2492
nicole;2493
angra;2494
reinhard_mey;2495
wolf_biermann;2496
florent_pagny;2497
hannes_wader;2498
tienne_daho;2499
henri_salvador;2500
f_lix_leclerc;2501
daniel_lavoie;2502
gerhard_sch_ne;2503
g_lben_ergen;2504
georg_kreisler;2505
herbert_gr_nemeyer;2506
herman_van_veen;2507
hildegard_knef;2508
marlene_dietrich;2509
iu;2510
jos_luis_rodr_guez;2511
juliette_gr_co;2512
klaus_hoffmann;2513
konstantin_wecker;2514
saltatio_mortis;2515
luigi_tenco;2516
maria_beth_nia;2517
adriana_calcanhotto;2518
marie_lafor_t;2519
marius_m_ller_westernhagen;2520
mina;2521
no_l_coward;2522
pippo_pollina;2523
rita_lee;2524
os_mutantes;2525
rita_pavone;2526
roger_whittaker;2527
al_bano_romina_power;2528
salvatore_adamo;2529
simone;2530
s_rgio_godinho;2531
udo_j_rgens;2532
udo_lindenberg;2533
ulrich_roski;2534
zaz;2535
z_ramalho;2536
fagner;2537
dith_piaf;2538
duelo;2539
espinoza_paz;2540
fidel_rueda;2541
la_firma;2542
la_arrolladora_banda_el_lim_n;2543
voz_de_mando;2544
sergio_vega;2545
fool_s_garden;2546
waltari;2547
of_montreal;2548
pierre_lapointe;2549
rufus_wainwright;2550
loudon_wainwright_iii;2551
sufjan_stevens;2552
machine_gun_kelly;2553
francesco_guccini;2554
le_orme;2555
lucio_dalla;2556
michel_fugain;2557
al_jarreau;2558
carmen_mcrae;2559
javier_sol_s;2560
harry_connick_jr;2561
bap;2562
cradle_of_filth;2563
amorphis;2564
avatar;2565
bathory;2566
behemoth;2567
borknagar;2568
countess;2569
cruachan;2570
darkthrone;2571
hate;2572
destruction;2573
dimmu_borgir;2574
eisregen;2575
enslaved;2576
finntroll;2577
fates_warning;2578
graveworm;2579
impaled_nazarene;2580
sentenced;2581
king_diamond;2582
kreator;2583
lord_belial;2584
marduk;2585
mercyful_fate;2586
stick_to_your_guns;2587
moonspell;2588
as_i_lay_dying;2589
nunslaughter;2590
rotting_christ;2591
samael;2592
sandy_denny;2593
skyforger;2594
sodom;2595
cannibal_corpse;2596
exodus;2597
atreyu;2598
theatres_des_vampires;2599
wizard;2600
transmetal;2601
venom;2602
belphegor;2603
the_crown;2604
moya_brennan;2605
todd_rundgren;2606
clay_walker;2607
andrew_peterson;2608
lynn_anderson;2609
david_crowder_band;2610
pam_tillis;2611
norah_jones;2612
rhonda_vincent;2613
jamey_johnson;2614
plumb;2615
j_j_cale;2616
new_riders_of_the_purple_sage;2617
joe_diffie;2618
kasey_chambers;2619
leon_russell;2620
jack_greene;2621
the_string_cheese_incident;2622
ystein_sunde;2623
stephen_stills;2624
cancerslug;2625
robert_plant;2626
alvin_lee;2627
beth_hart;2628
jimmy_buffett;2629
billy_s_band;2630
bunbury;2631
nacho_vegas;2632
calogero;2633
georges_brassens;2634
canned_heat;2635
charlie_louvin;2636
colin_james;2637
cuby_blizzards;2638
dick_annegarn;2639
edoardo_bennato;2640
eva_cassidy;2641
gil_scott_heron;2642
glenn_hughes;2643
deep_purple;2644
connie_smith;2645
iva_zanicchi;2646
izzy_stradlin;2647
j_karjalainen;2648
jack_bruce;2649
leonard_cohen;2650
joan_armatrading;2651
joan_osborne;2652
john_martyn;2653
rio_reiser;2654
larry_carlton;2655
madeleine_peyroux;2656
bruce_cockburn;2657
kate_anna_mcgarrigle;2658
mavis_staples;2659
noa;2660
ralph_mctell;2661
renato_carosone;2662
richie_kotzen;2663
robben_ford;2664
roberto_carlos;2665
erasmo_carlos;2666
robin_trower;2667
rory_block;2668
roy_buchanan;2669
sandra_mihanovich;2670
savoy_brown;2671
shirley_horn;2672
siniestro_total;2673
slank;2674
the_fabulous_thunderbirds;2675
the_seatbelts;2676
the_tragically_hip;2677
mike_jones;2678
trophy_scars;2679
caravan;2680
velhas_virgens;2681
walter_trout;2682
gov_t_mule;2683
bar_o_vermelho;2684
blue_cheer;2685
ian_hunter;2686
david_leb_n;2687
de_palmas;2688
eugenio_finardi;2689
extreme;2690
foghat;2691
george_thorogood_the_destroyers;2692
great_white;2693
guardian;2694
jethro_tull;2695
ian_anderson;2696
david_knopfler;2697
steppenwolf;2698
dave_edmunds;2699
lynyrd_skynyrd;2700
crosby_stills_nash;2701
raul_seixas;2702
the_poodles;2703
musiq_soulchild;2704
shocking_blue;2705
nick_lowe;2706
the_black_crowes;2707
traffic;2708
widespread_panic;2709
co;2710
alberto_cortez;2711
joan_sebastian;2712
ana_gabriel;2713
gilberto_santa_rosa;2714
rub_n_blades;2715
v_ctor_manuelle;2716
celia_cruz;2717
luis_fonsi;2718
nek;2719
dr_feelgood;2720
astrud_gilberto;2721
benito_di_paula;2722
brazzaville;2723
sacha_distel;2724
chico_buarque;2725
elis_regina;2726
milton_nascimento;2727
faf_de_bel_m;2728
nikka_costa;2729
tim_maia;2730
gilberto_gil;2731
lisa_ekdahl;2732
joyce;2733
maria_rita;2734
nara_le_o;2735
nouvelle_vague;2736
paulinho_moska;2737
wilson_simonal;2738
14_bis;2739
arnaldo_antunes;2740
biquini_cavad_o;2741
cidade_negra;2742
cpm_22;2743
c_ssia_eller;2744
os_paralamas_do_sucesso;2745
guilherme_arantes;2746
ira;2747
lob_o;2748
nenhum_de_n_s;2749
djavan;2750
rog_rio_skylab;2751
roupa_nova;2752
ultraje_a_rigor;2753
kj_52;2754
amado_batista;2755
chit_ozinho_xoror;2756
jo_o_paulo_daniel;2757
leandro_leonardo;2758
leonardo;2759
odair_jos;2760
kaiser_chiefs;2761
kula_shaker;2762
lightning_seeds;2763
pulp;2764
the_proclaimers;2765
dying_fetus;2766
napalm_death;2767
nile;2768
pathology;2769
hilary_duff;2770
badly_drawn_boy;2771
federico_salvatore;2772
i_gufi;2773
zachary_richard;2774
stan_rogers;2775
moxy_fr_vous;2776
poco;2777
la_bottine_souriante;2778
stompin_tom_connors;2779
bersuit_vergarabat;2780
las_pastillas_del_abuelo;2781
george_lam;2782
altan;2783
clannad;2784
blackmore_s_night;2785
capercaillie;2786
celtic_thunder;2787
eluveitie;2788
powerwolf;2789
gaelic_storm;2790
an_na;2791
jon_anderson;2792
the_dubliners;2793
loreena_mckennitt;2794
omnia;2795
secret_garden;2796
shaun_davey;2797
roger_daltrey;2798
the_corrs;2799
los_tigres_del_norte;2800
laurent_voulzy;2801
the_kelly_family;2802
wolfe_tones;2803
alan_stivell;2804
heather_alexander;2805
kate_rusby;2806
dropkick_murphys;2807
great_big_sea;2808
fiddler_s_green;2809
heather_dale;2810
runrig;2811
the_waterboys;2812
dougie_maclean;2813
adriano_celentano;2814
alain_chamfort;2815
zazie;2816
hamelen;2817
tazenda;2818
arno;2819
arthur_h;2820
boudewijn_de_groot;2821
charles_trenet;2822
claudio_baglioni;2823
claudio_rocchi;2824
fabrizio_de_andr;2825
dalida;2826
dana_winner;2827
demis_roussos;2828
esther_ofarim;2829
eugenio_bennato;2830
michel_berger;2831
francis_cabrel;2832
maxime_le_forestier;2833
georges_moustaki;2834
gianmaria_testa;2835
gianni_morandi;2836
gigliola_cinquetti;2837
milva;2838
gilbert_b_caud;2839
ginette_reno;2840
giuni_russo;2841
guy_b_art;2842
helena_vondr_kov;2843
hugues_aufray;2844
ivan_graziani;2845
ivano_fossati;2846
jacques_bertin;2847
jean_ferrat;2848
juliane_werding;2849
julien_clerc;2850
los_temerarios;2851
katerine;2852
leny_escudero;2853
mathieu_chedid;2854
luca_barbarossa;2855
l_o_ferr;2856
rosenstolz;2857
marc_lavoine;2858
massimo_bubola;2859
mecano;2860
mia_martini;2861
michel_jonasz;2862
michele_zarrillo;2863
fiorello;2864
nada;2865
mercedes_sosa;2866
nino_d_angelo;2867
patrick_bruel;2868
patty_pravo;2869
pierre_bachelet;2870
rainald_grebe;2871
rapha_l;2872
raphael;2873
richard_anthony;2874
roberto_murolo;2875
ron;2876
stefano_rosso;2877
stephan_eicher;2878
vasco_rossi;2879
yves_duteil;2880
yves_jamait;2881
ang_lica;2882
aaron_carter;2883
barry_louis_polisar;2884
yuri;2885
cri_cri;2886
hevisaurus;2887
juice_leskinen;2888
kidz_bop;2889
mara_maravilha;2890
destroyer;2891
scorpions;2892
obk;2893
duncan_dhu;2894
parry_gripp;2895
sandy_junior;2896
the_verve_pipe;2897
the_verve;2898
the_wiggles;2899
veggietales;2900
newsboys;2901
steven_curtis_chapman;2902
toro_y_moi;2903
medi_val_b_bes;2904
aaron_neville;2905
bethel_music;2906
apologetix;2907
gaither_vocal_band;2908
building_429;2909
chris_tomlin;2910
matt_maher;2911
jerusalem;2912
david_meece;2913
debby_boone;2914
elevation_worship;2915
matt_redman;2916
planetshakers;2917
majesty;2918
jump5;2919
lecrae;2920
michael_w_smith;2921
bride;2922
natalie_grant;2923
the_lads;2924
audio_adrenaline;2925
paul_wilbur;2926
psalmen_voor_nu;2927
sawyer_brown;2928
shane_shane;2929
the_echoing_green;2930
twila_paris;2931
watch_tower_bible_and_tract_society;2932
da_t_r_u_t_h;2933
dc_talk;2934
flame;2935
grits;2936
trip_lee;2937
crystal_lewis;2938
the_cross_movement;2939
tobymac;2940
vico_c;2941
mormon_tabernacle_choir;2942
august_burns_red;2943
black_veil_brides;2944
deliverance;2945
opeth;2946
die_happy;2947
disciple;2948
galactic_cowboys;2949
haste_the_day;2950
living_sacrifice;2951
mastodon;2952
mortification;2953
showbread;2954
labyrinth;2955
stryper;2956
the_devil_wears_prada;2957
underoath;2958
whitecross;2959
petra;2960
huntingtons;2961
mxpx;2962
d_a_d;2963
caedmon_s_call;2964
david_and_the_giants;2965
degarmo_and_key;2966
delirious;2967
don_francisco;2968
five_iron_frenzy;2969
geoff_moore;2970
hawk_nelson;2971
grave;2972
larry_norman;2973
randy_stonehill;2974
monty_python;2975
oomph;2976
oficina_g3;2977
white_heart;2978
rescate;2979
rick_wakeman;2980
la_oreja_de_van_gogh;2981
sanctus_real;2982
fun_people;2983
thousand_foot_krutch;2984
tim_hughes;2985
the_o_c_supertones;2986
4him;2987
billy_gilman;2988
aimee_mann;2989
katharine_mcphee;2990
eros_ramazzotti;2991
z_ro;2992
babbie_mason;2993
bebo_norman;2994
judy_garland;2995
carman;2996
cece_winans;2997
trick_daddy;2998
chris_isaak;2999
cocteau_twins;3000
edyta_g_rniak;3001
enrico_ruggeri;3002
ffh;3003
hanson;3004
hawksley_workman;3005
indigo_girls;3006
irene_grandi;3007
jackie_evancho;3008
joy_electric;3009
kelly_price;3010
mary_mary;3011
israel_houghton;3012
phil_wickham;3013
phillips_craig_dean;3014
roch_voisine;3015
rupaul;3016
gregorian;3017
sarah_connor;3018
sugarland;3019
sweetbox;3020
tarja;3021
the_brian_setzer_orchestra;3022
brian_setzer;3023
badfinger;3024
the_moffatts;3025
the_vandals;3026
trans_siberian_orchestra;3027
roy_drusky;3028
burton_cummings;3029
procol_harum;3030
renaissance;3031
the_pretty_things;3032
twisted_sister;3033
bj_rn_eidsv_g;3034
corvus_corax;3035
schelmish;3036
emilie_autumn;3037
epica;3038
katherine_jenkins;3039
scala_kolacny_brothers;3040
take_6;3041
the_roches;3042
tony_banks;3043
to_e_proeski;3044
lacrimosa;3045
16_volt;3046
bj_rn_rosenstr_m;3047
bob_rivers;3048
cledus_t_judd;3049
frankjavcee;3050
george_formby;3051
ninja_sex_party;3052
paul_and_storm;3053
the_arrogant_worms;3054
tripod;3055
el_cuarteto_de_nos;3056
gwar;3057
knorkator;3058
psychostick;3059
rodgau_monotones;3060
los_palominos;3061
charlie_peacock;3062
jesus_culture;3063
michael_card;3064
tenth_avenue_north;3065
carrie_newcomer;3066
nick_drake;3067
aaron_watson;3068
billy_joe_royal;3069
billy_joe_shaver;3070
charlie_landsborough;3071
chris_ledoux;3072
collin_raye;3073
dan_seals;3074
dave_dudley;3075
hellbillies;3076
ed_bruce;3077
emilio_navaira;3078
jean_shepard;3079
freddie_hart;3080
gary_stewart;3081
gene_watson;3082
gian_giovani;3083
gilberto_gilmar;3084
jason_mraz;3085
ilse_delange;3086
john_prine;3087
jake_owen;3088
wynn_stewart;3089
jim_ed_brown;3090
joe_ely;3091
kid_rock;3092
la_toya_jackson;3093
lit;3094
lita_ford;3095
me_first_and_the_gimme_gimmes;3096
lagwagon;3097
melanie;3098
mickey_newbury;3099
paul_brunelle;3100
paula_fernandes;3101
zez_di_camargo_luciano;3102
randy_rogers_band;3103
reverend_horton_heat;3104
rick_renner;3105
rionegro_solim_es;3106
shooter_jennings;3107
terri_clark;3108
vern_gosdin;3109
webb_wilder;3110
ween;3111
38_special;3112
the_beau_brummels;3113
matanza;3114
clawfinger;3115
acid_drinkers;3116
agnostic_front;3117
biohazard;3118
body_count;3119
d_r_i;3120
municipal_waste;3121
neurosis;3122
nuclear_assault;3123
soziedad_alkoholika;3124
suicidal_tendencies;3125
paragon;3126
mario;3127
inna;3128
belinda;3129
bronco;3130
grupo_bryndis;3131
david_bisbal;3132
ram_n_ayala;3133
grant_lee_phillips;3134
the_veronicas;3135
amr_diab;3136
atb;3137
basshunter;3138
dream_theater;3139
frankie_j;3140
baby_bash;3141
sophie_ellis_bextor;3142
grace_jones;3143
laveerre;3144
silkk_the_shocker;3145
parov_stelar;3146
raffaella_carr;3147
elephant_man;3148
saint_etienne;3149
samantha_fox;3150
selena;3151
super_junior;3152
t_a_t_u;3153
tarkan;3154
judie_tzuke;3155
el_kel_iset;3156
yello;3157
franz_ferdinand;3158
chenoa;3159
lucero;3160
tokio;3161
puffy_amiyumi;3162
wink;3163
obie_trice;3164
mystikal;3165
current_93;3166
dark_sanctuary;3167
rome;3168
lord_of_the_lost;3169
bella_morte;3170
mantus;3171
blutengel;3172
clan_of_xymox;3173
dead_can_dance;3174
death_in_june;3175
diary_of_dreams;3176
diorama;3177
helium_vola;3178
illuminate;3179
l_me_immortelle;3180
lacrimas_profundere;3181
killing_joke;3182
m_nchener_freiheit;3183
otto_dix;3184
project_pitchfork;3185
qntal;3186
sopor_aeternus;3187
the_cr_xshadows;3188
unheilig;3189
welle;3190
yendri;3191
carcass;3192
asphyx;3193
bolt_thrower;3194
darkseed;3195
paradise_lost;3196
tiamat;3197
the_damned;3198
pantera;3199
the_amity_affliction;3200
judas_priest;3201
amon_amarth;3202
alesana;3203
atrocity;3204
autopsy;3205
avulsed;3206
sabaton;3207
misfits;3208
iron_fire;3209
centinex;3210
dagoba;3211
dark_tranquillity;3212
asia;3213
deicide;3214
dethklok;3215
dew_scented;3216
edge_of_sanity;3217
escape_the_fate;3218
heaven_shall_burn;3219
hypocrisy;3220
incantation;3221
jungle_rot;3222
kataklysm;3223
krisiun;3224
macabre;3225
malevolent_creation;3226
meshuggah;3227
misanthrope;3228
morbid_angel;3229
dead_kennedys;3230
necro;3231
pig_destroyer;3232
shadows_fall;3233
sinister;3234
six_feet_under;3235
dream_evil;3236
soulfly;3237
the_black_dahlia_murder;3238
between_the_buried_and_me;3239
therion;3240
vader;3241
whitechapel;3242
attila;3243
emmure;3244
miss_may_i;3245
the_acacia_strain;3246
betontod;3247
broilers;3248
dritte_wahl;3249
ohl;3250
slime;3251
terrorgruppe;3252
b_hse_onkelz;3253
frei_wild;3254
k_rbholz;3255
asp;3256
tokio_hotel;3257
queensr_che;3258
amanda_miguel;3259
arabesque;3260
bad_boys_blue;3261
boyce_avenue;3262
parliament;3263
wu_tang_clan;3264
neoton_fam_lia;3265
teena_marie;3266
bobby_womack;3267
agoraphobic_nosebleed;3268
candlemass;3269
electric_wizard;3270
black_sabbath;3271
theatre_of_tragedy;3272
type_o_negative;3273
marie_fredriksson;3274
luna;3275
marissa_nadler;3276
yo_la_tengo;3277
celldweller;3278
hitomi;3279
big_d_and_the_kids_table;3280
alacranes_musical;3281
k_paz_de_la_sierra;3282
assemblage_23;3283
covenant;3284
die_krupps;3285
kodak_black;3286
front_242;3287
haujobb;3288
in_strict_confidence;3289
le_ther_strip;3290
snog;3291
the_darkness;3292
tanzwut;3293
terminal_choice;3294
velvet_acid_christ;3295
vnv_nation;3296
wumpscut;3297
x_fusion;3298
umbra_et_imago;3299
de_vision;3300
deichkind;3301
eisbrecher;3302
herbie_hancock;3303
ana_moura;3304
macaco;3305
skinny_puppy;3306
ayreon;3307
black_moth_super_rainbow;3308
erykah_badu;3309
cocorosie;3310
de_jeugd_van_tegenwoordig;3311
dj_shadow;3312
e_nomine;3313
kmfdm;3314
flying_lotus;3315
goldfrapp;3316
hanzel_und_gretyl;3317
information_society;3318
mc_frontalot;3319
kraftwerk;3320
ladytron;3321
lamb;3322
milk_inc;3323
mind_in_a_box;3324
ministry;3325
m_m;3326
m_nia;3327
pig;3328
pitchshifter;3329
lil_boosie;3330
master_p;3331
mindless_self_indulgence;3332
buzzcocks;3333
vanilla_ice;3334
milie_simon;3335
gianna_nannini;3336
pinback;3337
the_birthday_massacre;3338
archive;3339
99_posse;3340
bloc_party;3341
morcheeba;3342
origa;3343
paul_kalkbrenner;3344
tina_arena;3345
dover;3346
melotron;3347
owl_city;3348
kamelot;3349
greeley_estates;3350
hawthorne_heights;3351
joan_of_arc;3352
saves_the_day;3353
thursday;3354
transit;3355
fairport_convention;3356
maggie_reilly;3357
joan_manuel_serrat;3358
e_rotic;3359
the_scene;3360
sandra;3361
amon_d_l_ii;3362
circa_survive;3363
love_solfege;3364
caliban;3365
tall_dwarfs;3366
van_der_graaf_generator;3367
death_grips;3368
the_fiery_furnaces;3369
am_lia_rodrigues;3370
cristina_branco;3371
jos_afonso;3372
katia_guerreiro;3373
ney_matogrosso;3374
madredeus;3375
mariza;3376
gipsy_kings;3377
mal;3378
aleks_syntek;3379
ni_a_pastori;3380
rosario;3381
al_stewart;3382
amos_lee;3383
andr_s_calamaro;3384
ane_brun;3385
asa;3386
editors;3387
catie_curtis;3388
chrystian_ralf;3389
clueso;3390
eddi_reader;3391
eddie_from_ohio;3392
ellis_paul;3393
frank_turner;3394
estampie;3395
ferdi_tayfur;3396
fito_p_ez;3397
luis_alberto_spinetta;3398
gabriella_ferri;3399
gigi;3400
greg_brown;3401
g_ksel;3402
lando_fiorini;3403
india_arie;3404
jack_savoretti;3405
anne_grete_preus;3406
jarom_r_nohavica;3407
joe_purdy;3408
john_wesley_harding;3409
josh_rouse;3410
karel_kryl;3411
v_tor_ramil;3412
lars_winnerb_ck;3413
laura_marling;3414
llu_s_llach;3415
los_chalchaleros;3416
luka_bloom;3417
malicorne;3418
mark_heard;3419
martin_carthy;3420
nic_jones;3421
le_n_gieco;3422
mijares;3423
nuova_compagnia_di_canto_popolare;3424
ola_magnell;3425
thin_lizzy;3426
ray_lamontagne;3427
ron_sexsmith;3428
rosana;3429
silvio_rodr_guez;3430
stef_bos;3431
sun_kil_moon;3432
tanita_tikaram;3433
the_incredible_string_band;3434
thea_gilmore;3435
tina_dico;3436
victor_leo;3437
v_rttin;3438
ge_aleksandersen;3439
i_brahim_tatl_ses;3440
ektomorf;3441
elvenking;3442
ensiferum;3443
falconer;3444
feuerschwanz;3445
in_extremo;3446
korpiklaani;3447
leaves_eyes;3448
letzte_instanz;3449
m_go_de_oz;3450
saurom;3451
schandmaul;3452
skyclad;3453
subway_to_sally;3454
suidakra;3455
t_r;3456
icehouse;3457
bomb_the_music_industry;3458
the_real_mckenzies;3459
54_40;3460
armored_saint;3461
alexz_johnson;3462
bar_man_o;3463
ezginin_g_nl;3464
galija;3465
sts;3466
h_kan_hellstr_m;3467
james_blunt;3468
kazik;3469
mewithoutyou;3470
michel_polnareff;3471
ovidi_montllor;3472
rasputina;3473
shearwater;3474
gerry_rafferty;3475
steam_powered_giraffe;3476
the_saw_doctors;3477
ty_segall;3478
tyrone_wells;3479
avi_es_do_forr;3480
grimskunk;3481
sinik;3482
vitaa;3483
kenza_farah;3484
sexion_d_assaut;3485
aliz_e;3486
henri_tachan;3487
jenifer;3488
m_pokora;3489
indochine;3490
brainstorm;3491
con_funk_shun;3492
funkadelic;3493
lena_park;3494
neffa;3495
ugk;3496
suburban_legends;3497
mai_kuraki;3498
cherry_poppin_daddies;3499
electric_six;3500
los_straitjackets;3501
the_69_eyes;3502
the_angels;3503
the_haunted;3504
the_hellacopters;3505
the_kills;3506
thee_oh_sees;3507
white_denim;3508
zabranjeno_pu_enje;3509
ol_dirty_bastard;3510
kurupt;3511
spice_1;3512
brotha_lynch_hung;3513
chamillionaire;3514
paul_wall;3515
trae;3516
club_dogo;3517
mc_eiht;3518
royce_da_5_9;3519
geto_boys;3520
the_diplomats;3521
ice_t;3522
2_live_crew;3523
xv;3524
mobb_deep;3525
c_murder;3526
tru;3527
lil_keke;3528
project_pat;3529
tha_dogg_pound;3530
esham;3531
twiztid;3532
erick_sermon;3533
big_tymers;3534
kate_nash;3535
the_cramps;3536
nekromantix;3537
tsol;3538
ace_frehley;3539
hardcore_superstar;3540
harem_scarem;3541
house_of_lords;3542
kingdom_come;3543
l_a_guns;3544
mr_big;3545
pink_cream_69;3546
quiet_riot;3547
riot;3548
ratt;3549
tnt;3550
backyard_babies;3551
ultima_thule;3552
europe;3553
hanoi_rocks;3554
mott_the_hoople;3555
smokie;3556
suzi_quatro;3557
haemorrhage;3558
aline_barros;3559
bruna_karla;3560
kirk_franklin;3561
minist_rio_koinonya_de_louvor;3562
artrosis;3563
closterkeller;3564
indica;3565
sirenia;3566
trail_of_tears;3567
tristania;3568
within_temptation;3569
bauhaus;3570
mono_inc;3571
pansy_division;3572
xandria;3573
immortal_technique;3574
agathocles;3575
rotten_sound;3576
the_locust;3577
anthrax;3578
devildriver;3579
lamb_of_god;3580
machine_head;3581
parkway_drive;3582
pro_pain;3583
throwdown;3584
vicious_rumors;3585
screaming_trees;3586
cuisillos;3587
intocable;3588
pesado;3589
la_mafia;3590
marco_antonio_sol_s;3591
los_bukis;3592
andrew_w_k;3593
april_wine;3594
axel_rudi_pell;3595
b_z;3596
tak_matsumoto;3597
barricada;3598
bijelo_dugme;3599
blaze_bayley;3600
bonfire;3601
bruce_dickinson;3602
buckcherry;3603
budgie;3604
buitres;3605
jorn;3606
doro;3607
enuff_z_nuff;3608
gentle_giant;3609
girlschool;3610
golden_earring;3611
gotthard;3612
nazareth;3613
a_day_to_remember;3614
jefferson_airplane;3615
joe_satriani;3616
ken_hensley;3617
kim_mitchell;3618
king_s_x;3619
kotiteollisuus;3620
la_renga;3621
lee_aaron;3622
lordi;3623
michael_schenker_group;3624
mustasch;3625
night_ranger;3626
omega;3627
parni_valjak;3628
paul_gilbert;3629
popeda;3630
skid_row;3631
tankcsapda;3632
the_bronx;3633
the_donnas;3634
all_that_remains;3635
triumph;3636
umphrey_s_mcgee;3637
y_t;3638
ziggy;3639
sfdk;3640
7_seconds;3641
aiden;3642
alphaville;3643
black_flag;3644
slayer;3645
circle_jerks;3646
ritchie;3647
converge;3648
every_time_i_die;3649
hatebreed;3650
nomeansno;3651
rancid;3652
memphis_may_fire;3653
nofx;3654
propagandhi;3655
tankard;3656
screeching_weasel;3657
sick_of_it_all;3658
silverstein;3659
two_steps_from_hell;3660
faun;3661
accept;3662
the_frames;3663
andromeda;3664
annihilator;3665
anvil;3666
artillery;3667
avenged_sevenfold;3668
axxis;3669
blind_guardian;3670
vanden_plas;3671
grave_digger;3672
dragonforce;3673
edenbridge;3674
damien_jurado;3675
exciter;3676
firewind;3677
halford;3678
hammerfall;3679
helloween;3680
helstar;3681
iced_earth;3682
jag_panzer;3683
machinae_supremacy;3684
manowar;3685
metal_church;3686
morgana_lefay;3687
mudvayne;3688
nocturnal_rites;3689
overkill;3690
primal_fear;3691
rebellion;3692
running_wild;3693
corey_hart;3694
savatage;3695
saxon;3696
steve_vai;3697
tad_morose;3698
tarot;3699
tierra_santa;3700
trivium;3701
turmion_k_til_t;3702
u_d_o;3703
virgin_steele;3704
voivod;3705
warcry;3706
yngwie_malmsteen;3707
zion_lennox;3708
sido;3709
mc_chris;3710
assalti_frontali;3711
kool_keith;3712
ayumi_hamasaki;3713
az;3714
bahh_tee;3715
bassi_maestro;3716
revocation;3717
blumentopf;3718
brockhampton;3719
bts;3720
bushido;3721
vinnie_paz;3722
chakuza;3723
cheek;3724
cro;3725
arc_ngel;3726
alexis_fido;3727
dargen_d_amico;3728
the_coup;3729
def_con_dos;3730
die_fantastischen_vier;3731
dom_no;3732
donguralesko;3733
epmd;3734
kool_savas;3735
fettes_brot;3736
fronda;3737
mc_solaar;3738
pyhimys;3739
kaaris;3740
kollegah;3741
kontra_k;3742
k_k;3743
l_o_c;3744
logic;3745
jerry_rivera;3746
murs;3747
angie_stone;3748
namie_amuro;3749
anthony_hamilton;3750
lyfe_jennings;3751
bl_f;3752
o_s_t_r;3753
paluch;3754
parazi_ii;3755
porta;3756
bleeding_through;3757
prinz_pi;3758
rasmentalism;3759
xavier_naidoo;3760
sage_francis;3761
stupeflip;3762
young_thug;3763
tego_calder_n;3764
fifth_harmony;3765
jay_chou;3766
blitzkid;3767
zumbis_do_espa_o;3768
deer_tick;3769
half_man_half_biscuit;3770
hayden;3771
club_8;3772
grandaddy;3773
jens_lekman;3774
kent;3775
keren_ann;3776
los_campesinos;3777
nellie_mckay;3778
china_crisis;3779
prefab_sprout;3780
the_clientele;3781
the_lucksmiths;3782
bell_x1;3783
british_sea_power;3784
car_seat_headrest;3785
deerhunter;3786
dr_dog;3787
elf_power;3788
frightened_rabbit;3789
fugazi;3790
fury_in_the_slaughterhouse;3791
julie_doiron;3792
tinashe;3793
la_habitaci_n_roja;3794
margot_the_nuclear_so_and_so_s;3795
matt_pond_pa;3796
metric;3797
mike_doughty;3798
mother_mother;3799
piebald;3800
quasi;3801
rheostatics;3802
sebadoh;3803
spoon;3804
starflyer_59;3805
stephen_malkmus;3806
stereolab;3807
ted_leo_and_the_pharmacists;3808
the_appleseed_cast;3809
the_faint;3810
the_go_betweens;3811
the_pineapple_thief;3812
the_undertones;3813
tronic;3814
chris_de_burgh;3815
mass_hysteria;3816
angelo_branduardi;3817
gigi_d_alessio;3818
i_muvrini;3819
back_number;3820
boa;3821
claris;3822
crystal_kay;3823
zard;3824
gackt;3825
garnet_crow;3826
girls_generation;3827
kat_tun;3828
koda_kumi;3829
kotoko;3830
lisa;3831
maaya_sakamoto;3832
masami_okui;3833
mr_children;3834
news;3835
shinee;3836
w_inds;3837
yui;3838
yumi_matsutoya;3839
the_high_lows;3840
sid;3841
abbey_lincoln;3842
anna_maria_jopek;3843
cassandra_wilson;3844
dianne_reeves;3845
fred_buscaglione;3846
jane_monheit;3847
zor_n;3848
kraan;3849
laura_fygi;3850
michael_franks;3851
natalino_otto;3852
quartetto_cetra;3853
scott_bradlee_s_postmodern_jukebox;3854
stacey_kent;3855
the_flower_kings;3856
ronnie_von;3857
brown_eyed_girls;3858
ahmet_kaya;3859
alejandra_guzm_n;3860
ana_carolina;3861
alcione;3862
el_chapo_de_sinaloa;3863
gustavo_cerati;3864
soda_stereo;3865
jenni_rivera;3866
joaqu_n_sabina;3867
los_fabulosos_cadillacs;3868
abel_pintos;3869
ana_bel_n;3870
aterciopelados;3871
camilo_sesto;3872
david_demar_a;3873
gian_marco;3874
menudo;3875
ricardo_arjona;3876
sabroso;3877
v_ctor_manuel;3878
las_pelotas;3879
ariel_pink;3880
leehom_wang;3881
jolin_tsai;3882
darkest_hour;3883
kalmah;3884
nightrage;3885
eppu_normaali;3886
the_outfield;3887
no_use_for_a_name;3888
pennywise;3889
callejon;3890
d_f_c;3891
our_last_night;3892
exaltasamba;3893
beth_carvalho;3894
jo_o_bosco;3895
marina_lima;3896
marisa_monte;3897
nando_reis;3898
natiruts;3899
ra_a_negra;3900
s_pra_contrariar;3901
zeca_pagodinho;3902
andr_hazes;3903
de_dijk;3904
arena;3905
iq;3906
sol_invictus;3907
new_found_glory;3908
adam_ant;3909
berlin;3910
hoodoo_gurus;3911
ultravox;3912
nik_kershaw;3913
squeeze;3914
the_aquabats;3915
the_fixx;3916
beat_crusaders;3917
cows;3918
conjunto_primavera;3919
peter_and_the_test_tube_babies;3920
sham_69;3921
the_adicts;3922
the_analogs;3923
instalok;3924
jacek_kaczmarski;3925
przemys_aw_gintrowski;3926
ada_band;3927
agnetha_f_ltskog;3928
ajda_pekkan;3929
al_bano;3930
alex_ubago;3931
alison_moyet;3932
alunni_del_sole;3933
anna_oxa;3934
bajm;3935
barclay_james_harvest;3936
blue_system;3937
brunner_brunner;3938
candan_er_etin;3939
christian_bautista;3940
clay_aiken;3941
clifford_t_ward;3942
daniel;3943
don_backy;3944
jesse_mccartney;3945
emma;3946
marcella_bella;3947
giorgio_gaber;3948
guus_meeuwis;3949
heinz_rudolf_kunze;3950
john_farnham;3951
ian_thomas;3952
i_n_karaca;3953
jennifer_rush;3954
jo_vally;3955
john_fogerty;3956
julian_lennon;3957
k3;3958
kid_abelha;3959
labv_l_gais_tips;3960
l_vi;3961
lea_salonga;3962
les_wampas;3963
magnus_uggla;3964
mango;3965
maria_mena;3966
massimo_ranieri;3967
max_gazz;3968
michael_learns_to_rock;3969
mietta;3970
mustafa_sandal;3971
nil_fer;3972
peter_frampton;3973
pr_ta_v_tra;3974
pur;3975
rettore;3976
ricchi_e_poveri;3977
rob_de_nijs;3978
sara_bareilles;3979
sasha;3980
sertab_erener;3981
sezen_aksu;3982
stadio;3983
stephen_sondheim;3984
tamara;3985
team_starkid;3986
toto_cutugno;3987
umberto_tozzi;3988
herman_brood;3989
wanessa;3990
zen_caf;3991
bonanza_banzai;3992
bodyjar;3993
bracket;3994
frenzal_rhomb;3995
goldfinger;3996
the_wonder_years;3997
useless_id;3998
camel;3999
hombres_g;4000
leo_jaime;4001
neal_morse;4002
spock_s_beard;4003
new_trolls;4004
opus;4005
piersi;4006
premiata_forneria_marconi;4007
superbus;4008
zmelkoow;4009
boysetsfire;4010
hot_water_music;4011
new_model_army;4012
the_monochrome_set;4013
big_big_train;4014
avantasia;4015
dark_moor;4016
dreamtale;4017
freedom_call;4018
mystic_prophecy;4019
nightmare;4020
rhapsody_of_fire;4021
royal_hunt;4022
sonata_arctica;4023
stratovarius;4024
symphony_x;4025
vision_divine;4026
the_wildhearts;4027
armia;4028
evergrey;4029
lana_lane;4030
nektar;4031
pain_of_salvation;4032
riverside;4033
beardfish;4034
echolyn;4035
eloy;4036
john_wetton;4037
medina_azahara;4038
mostly_autumn;4039
pendragon;4040
rafo_r_ez;4041
the_meteors;4042
against_me;4043
anti_flag;4044
banda_bassotti;4045
cadena_perpetua;4046
descendents;4047
distemper;4048
dogwood;4049
el_ltimo_ke_zierre;4050
farben_lehre;4051
toy_dolls;4052
junkies;4053
ksu;4054
la_polla_records;4055
la_vela_puerca;4056
leatherface;4057
less_than_jake;4058
mad_caddies;4059
millencolin;4060
punkreas;4061
reel_big_fish;4062
snfu;4063
stiff_little_fingers;4064
swingin_utters;4065
the_bouncing_souls;4066
the_casualties;4067
the_dickies;4068
the_lawrence_arms;4069
toyah;4070
gerald_levert;4071
gondwana;4072
los_aut_nticos_decadentes;4073
los_cafres;4074
los_pericos;4075
tryo;4076
rakim_ken_y;4077
billy_squier;4078
bj_rn_afzelius;4079
glay;4080
hunters_collectors;4081
john_entwistle;4082
jokke;4083
la_beriso;4084
los_rancheros;4085
los_tres;4086
maanam;4087
mikel_erentxun;4088
peter_wolf;4089
racoon;4090
rev_lver;4091
riblja_orba;4092
sandro;4093
gene_vincent;4094
the_baseballs;4095
stray_cats;4096
as_marcianas;4097
bruno_marrone;4098
cristiano_ara_jo;4099
fernando_sorocaba;4100
joint_venture;4101
serge_reggiani;4102
ska_p;4103
the_mighty_mighty_bosstones;4104
fu_manchu;4105
jay_jay_johanson;4106
psyche;4107
carlos_gardel;4108


================================================
FILE: jukebox/data/ids/v2_genre_ids.txt
================================================
unknown;0
classical;1
blues;2
hip;3
hop;4
dance;5
soul;6
hard;7
rock;8
jazz;9
reggae;10
country;11
alternative;12
soundtrack;13
pop;14
bluegrass;15
vocal;16
r;17
b;18
rap;19
christian;20
gospel;21
electronic;22
christmas;23
singer;24
songwriter;25
metal;26
n;27
roll;28
synthpop;29
electronica;30
mpb;31
movie;32
indie;33
new;34
wave;35
electro;36
house;37
folk;38
punk;39
french;40
contemporary;41
garage;42
soft;43
acoustic;44
nu;45
television;46
post;47
eurodance;48
progressive;49
gothic;50
classic;51
funk;52
disco;53
swing;54
trance;55
thrash;56
psychedelic;57
heavy;58
american;59
grunge;60
art;61
j;62
gangsta;63
brazilian;64
latin;65
southern;66
ska;67
crossover;68
hardcore;69
industrial;70
glam;71
melodic;72
ambient;73
musical;74
dream;75
experimental;76
americana;77
chanson;78
rockabilly;79
britpop;80
children;81
s;82
music;83
electropop;84
power;85
celtic;86
dark;87
comedy;88
doom;89
trip;90
lo;91
fi;92
metalcore;93
symphonic;94
fado;95
schlager;96
avant;97
garde;98
europop;99
reggaeton;100
emo;101
death;102
samba;103
deathcore;104
black;105
horrorcore;106
grindcore;107
worship;108
salsa;109
ebm;110
neofolk;111
sertanejo;112
deutschrock;113
norte;114
o;115
ax;116
k;117
tejano;118
medieval;119


================================================
FILE: jukebox/data/ids/v3_artist_ids.txt
================================================
beat farmers;1
aaron sprinkle;2
dianne reeves;3
lowe;4
harry manx;5
hail of bullets;6
ian gillan;7
andraé crouch;8
widespread panic;9
buddy wasisname and the other fellers;10
misery index;11
albert west;12
shadowland;13
homer & jethro;14
damien jurado;15
dead to fall;16
british sea power;17
pam tillis;18
ice cube;19
hey rosetta!;20
sophie zelmani;21
riverside;22
head automatica;23
diabulus in musica;24
unitopia;25
revolting cocks;26
zita swoon;27
train;28
ken stringfellow;29
in dying arms;30
red lorry yellow lorry;31
small faces;32
michael sweet;33
30 odd foot of grunts;34
white heart;35
baby bash;36
bad bones;37
meat beat manifesto;38
vengeance;39
naomi;40
koritni;41
the fall of troy;42
split enz;43
emmy rossum;44
les fleur de lys;45
beaux arts trio;46
david crowder band;47
mojave 3;48
girl talk;49
motorpsycho;50
burning point;51
the rutles;52
david and the giants;53
jinjer;54
sitd;55
pedro the lion;56
masta ace;57
alexz johnson;58
the floacist;59
after 7;60
anointed;61
holy soldier;62
sanchez;63
wovenhand;64
thea gilmore;65
3t;66
patty loveless;67
ghost;68
rie fu;69
chemical vocation;70
robbie nevil;71
the notorious b.i.g.;72
america;73
the boo radleys;74
in hearts wake;75
jack the lad;76
gerry and the pacemakers;77
he is we;78
cuban link;79
galaxie 500;80
something with numbers;81
the last shadow puppets;82
minor threat;83
joss stone;84
lynch mob;85
zino francescatti;86
genitorturers;87
kenny g;88
graveworm;89
field mob;90
opus;91
jordan smith;92
sheppard;93
the haunted;94
tiny ruins;95
jimmy somerville;96
acid reign;97
falling in reverse;98
ace troubleshooter;99
josh groban;100
adriano celentano;101
john oates;102
mind funk;103
christafari;104
clan of xymox;105
anti-flag;106
the blow monkeys;107
the troggs;108
priscilla ahn;109
fastball;110
raekwon;111
royal wood;112
agoraphobic nosebleed;113
borknagar;114
parker millsap;115
kelly osbourne;116
psyche;117
brokencyde;118
george clinton;119
the hollies;120
gabriel kahane;121
dnce;122
jimmy nail;123
harem scarem;124
pierre fournier;125
gideon;126
elitist;127
the sheepdogs;128
like moths to flames;129
the constructus corporation;130
impending doom;131
joe williams;132
bizzy bone;133
nelson;134
earth and fire;135
underoath;136
rancid;137
exile;138
vertical horizon;139
percy sledge;140
ill bill;141
59 times the pain;142
jimmy dean;143
gary jules;144
spellblast;145
renee olstead;146
barbra streisand;147
spin doctors;148
galt macdermot;149
takara;150
alan stivell;151
andy davis;152
babes in toyland;153
still remains;154
the donnas;155
bishop allen;156
the skids;157
rhiannon giddens;158
natalia;159
henson cargill;160
gov't mule;161
jools holland;162
kehlani;163
londonbeat;164
andy mineo;165
corky and the juice pigs;166
days away;167
a fine frenzy;168
roger mcguinn;169
lena horne;170
shark island;171
machinemade god;172
yank rachell;173
hurricane;174
his statue falls;175
that petrol emotion;176
764-hero;177
leprous;178
bridgit mendler;179
beggars opera;180
abbie gale;181
the the;182
y'akoto;183
sound tesselated;184
webb pierce;185
river whyless;186
ronnie dio & the prophets;187
rotting christ;188
duff mckagan;189
slim harpo;190
adele;191
valencia;192
the damned;193
miguel;194
chantal kreviazuk;195
the db's;196
cartel;197
enrique iglesias;198
skrewdriver;199
one less reason;200
lil wayne;201
chris norman;202
type o negative;203
trip shakespeare;204
jack blanchard & misty morgan;205
fishboy;206
ted leo and the pharmacists;207
lukas graham;208
the vapors;209
conway twitty & loretta lynn;210
sandie shaw;211
mark knopfler;212
through the eyes of the dead;213
art of dying;214
free;215
saint motel;216
sonreal;217
gatsbys american dream;218
elisa;219
marc anthony;220
joan baez;221
someone still loves you boris yeltsin;222
ugk;223
deep purple;224
mother mother;225
the contortionist;226
hot chelle rae;227
eric clapton;228
the doobie brothers;229
john michael montgomery;230
izegrim;231
jason collett;232
close your eyes;233
snog;234
ghostpoet;235
new order;236
the brian setzer orchestra;237
royal tusk;238
guy mitchell;239
heart;240
the free design;241
billie ray martin;242
toto;243
david mallett;244
donovan;245
the years gone by;246
element 101;247
fairyland;248
triggerfinger;249
mc eiht;250
ottorino respighi;251
the four aces;252
lil son jackson;253
emanuel feuermann;254
juliette and the licks;255
p!nk;256
gretchen wilson;257
the animals;258
locksley;259
redgum;260
young mc;261
metronomy;262
ashland high;263
esoteric;264
johnny hates jazz;265
paul anka;266
ethel merman;267
east west;268
the knife;269
curren$y;270
maaya sakamoto;271
aesthetic perfection;272
bobby helms;273
jimi jamison;274
darren styles;275
lorrie morgan;276
miley cyrus;277
dropdead;278
dr. sin;279
burt bacharach;280
nf;281
astronautalis;282
garth brooks;283
flyleaf;284
lake;285
mad sin;286
tiffany evans;287
mudvayne;288
máni svavarsson & magnús scheving;289
unexpect;290
collin raye;291
johnny reid;292
antonio vivaldi;293
creed;294
burning heads;295
legion of the damned;296
matt costa;297
the aluminum group;298
orgy;299
2nd chapter of acts;300
shotgun messiah;301
mentallo & the fixer;302
urma;303
carmen mcrae;304
skid row;305
john denver & the muppets;306
angie stone;307
rob rock;308
clara haskil;309
morandi;310
team dresch;311
the zombies;312
mr. vegas;313
royal trux;314
suzanne vega;315
the afters;316
skydiggers;317
sunday's best;318
gary numan;319
three dog night;320
jonas brothers;321
uncle acid & the deadbeats;322
pablo de sarasate;323
goretrade;324
strapping young lad;325
fat joe;326
robert johnson;327
fred astaire;328
anastacia;329
devendra banhart;330
into it. over it.;331
john norum;332
cross canadian ragweed;333
destroyer;334
michael christmas;335
phobia;336
jill tracy;337
dilana;338
royce da 5'9";339
les savy fav;340
the blow;341
kim wilde;342
parts & labor;343
dinah washington;344
maggie reilly;345
screaming trees;346
p.o.s.;347
atomic rooster;348
chamillionaire;349
the vaccines;350
tides of man;351
heathen;352
flame;353
brain drill;354
ac/dc;355
kraan;356
scary kids scaring kids;357
rosaline;358
john legend;359
of montreal;360
the brunettes;361
shelley fabares;362
volumes;363
george enescu;364
jacob's dream;365
heartless bastards;366
darin;367
andy stochansky;368
david geringas;369
lucius;370
steep;371
bobby vinton;372
shania twain;373
rudolf serkin;374
the zolas;375
municipal waste;376
spectral;377
arcade fire;378
steve hillage;379
the presets;380
gustav mahler;381
gary morris;382
laura cantrell;383
dean brody;384
roger miller;385
tammy wynette;386
joe cocker;387
iceage;388
apostasy;389
tait;390
reverend gary davis;391
neverending white lights;392
mimicking birds;393
barney;394
major parkinson;395
seal;396
wham!;397
tha dogg pound;398
big l;399
ian thomas;400
kronos;401
dom pachino;402
dead can dance;403
the number twelve looks like you;404
bert williams;405
bedhead;406
scott bradlee's postmodern jukebox;407
monuments;408
christine mcvie;409
moonspell;410
david & the citizens;411
*nsync;412
tiny tim;413
surface;414
k.flay;415
travis scott;416
lil jon;417
jo stafford;418
elo part ii;419
sugarland;420
eternal;421
the dingees;422
the summer set;423
soft machine;424
maanam;425
right said fred;426
chicks on speed;427
foetus;428
fiona apple;429
primer 55;430
the dillinger escape plan;431
seahaven;432
biga ranx;433
the insyderz;434
thirty seconds to mars;435
page france;436
howlin' wolf;437
wishbone ash;438
nina sky;439
jess moskaluke;440
stan rogers;441
b.o.b;442
cypecore;443
young dro;444
julian lennon;445
opeth;446
flying lotus;447
rodney atkins;448
sea of treachery;449
montrose;450
nellie mckay;451
vladimir horowitz;452
fatboy slim;453
mystic prophecy;454
little river band;455
brooklyn bounce;456
destroid;457
mary hopkin;458
elliott yamin;459
billy bragg;460
the doors;461
esham;462
cab calloway;463
thi'sl;464
the gothsicles;465
david coverdale;466
joe henry;467
the human abstract;468
alger "texas" alexander;469
diane cluck;470
fozzy;471
zero 7;472
cole swindell;473
gladys knight & the pips;474
donna fargo;475
cave in;476
eiffel 65;477
fates warning;478
decrepit birth;479
bad religion;480
poison clan;481
shane & shane;482
johnny shines;483
u.n.l.v.;484
seth lakeman;485
mindy smith;486
josh white;487
android lust;488
mylon lefevre;489
aselin debison;490
kaskade;491
the stills;492
alpha blondy;493
hughes turner project;494
spice girls;495
zz top;496
fairport convention;497
the ritchie family;498
eleanor friedberger;499
laura branigan;500
the jordanaires;501
the bacon brothers;502
atomic opera;503
spike jones;504
faith hill;505
mandy moore;506
jan werner;507
kittie;508
edwin;509
michael roe;510
leeland;511
sammy hagar;512
frankjavcee;513
the bangles;514
joey mcintyre;515
david rovics;516
across the border;517
odd future;518
bill ward;519
eddy grant;520
boa;521
nirvana;522
darzamat;523
ed sheeran;524
the prodigy;525
wang chung;526
balance problems;527
valient thorr;528
rupaul;529
roy clark;530
ross lynch;531
ugly kid joe;532
bettye lavette;533
harry belafonte;534
roy buchanan;535
miguel bosé;536
greenslade;537
living legends;538
bing crosby;539
adam sandler;540
the czars;541
bethany dillon;542
lea salonga;543
kmfdm;544
the diplomats;545
magneta lane;546
mira;547
g herbo;548
issues;549
beastie boys;550
marvin gaye;551
ashes you leave;552
mordred;553
israel houghton;554
screaming mechanical brain;555
unknown hinson;556
jack johnson;557
do;558
guns n' roses;559
october project;560
adore delano;561
jedi mind tricks;562
andrew peterson;563
millionaires;564
the beatnuts;565
gilby clarke;566
chickenfoot;567
the stranglers;568
rev theory;569
the mccalmans;570
drowning pool;571
kutt calhoun;572
dark fortress;573
the undertones;574
kevin gilbert;575
ffh;576
seven places;577
fury in the slaughterhouse;578
covenant;579
jason isbell;580
the creepshow;581
ashbury heights;582
shakey graves;583
brett young;584
lords of black;585
the higher;586
judy garland;587
boy harsher;588
status quo;589
iq;590
underworld;591
krizz kaliko;592
jefferson airplane;593
billy walker;594
jackie lomax;595
lizzy borden;596
keke wyatt;597
closterkeller;598
agnostic front;599
mary mary;600
birds in row;601
mugison;602
randy travis;603
glorior belli;604
amorphis;605
martika;606
jason webley;607
duke ellington;608
europe;609
the wilkinsons;610
a bullet for pretty boy;611
jodeci;612
sister hazel;613
atrocity;614
little willie john;615
alexander borodin;616
belouis some;617
big boi;618
newworldson;619
muddy waters;620
karen elson;621
lou bega;622
ivoryline;623
pain confessor;624
dolour;625
captain beefheart and his magic band;626
barclay james harvest;627
todd snider;628
enslaved;629
beach fossils;630
trick daddy;631
the black dahlia murder;632
rhapsody of fire;633
cemetary;634
patsy cline;635
figure four;636
manuel de falla;637
neil diamond;638
sworn enemy;639
elvenking;640
d.r.a.m.;641
sonya kitchell;642
flight of the conchords;643
eddie from ohio;644
talk talk;645
thoushaltnot;646
this is the kit;647
crimson glory;648
the bears;649
amenra;650
doris day;651
death in june;652
aaron copland;653
astrud gilberto;654
luna;655
fury;656
corpus christi;657
soul position;658
be'lakor;659
roy orbison;660
beyond dawn;661
el-p;662
watch tower bible and tract society;663
end of you;664
falconer;665
war from a harlots mouth;666
china;667
erra;668
brainpool;669
psyclon nine;670
quintorigo;671
the blind boys of alabama;672
mr. big;673
reverend horton heat;674
yehuda hanani;675
bell x1;676
john michael talbot;677
sigh;678
james brown;679
the murder of my sweet;680
courtney marie andrews;681
kate alexa;682
jasmine v;683
malevolent creation;684
said the whale;685
the indelicates;686
masterplan;687
every time i die;688
echosmith;689
barzin;690
thelma houston;691
masters of reality;692
tony iommi;693
sex gang children;694
chaos uk;695
casper & the cookies;696
johnny gill;697
alex harvey;698
lemar;699
andre matos;700
shinee;701
earl sweatshirt;702
sanctus real;703
chilliwack;704
lionel hampton;705
faith assembly;706
dave van ronk;707
frankie goes to hollywood;708
badfinger;709
gazpacho;710
centro-matic;711
donald lawrence;712
me first and the gimme gimmes;713
margot & the nuclear so and so's;714
big star;715
unisonic;716
the delgados;717
28 days;718
veil of maya;719
tammi terrell;720
the righteous brothers;721
david knopfler;722
ihsahn;723
high inergy;724
leaves' eyes;725
parov stelar;726
the retrosic;727
tony rose;728
tom lehrer;729
buggles;730
dennis brown;731
trashcan sinatras;732
polarkreis 18;733
mavis staples;734
xzibit;735
black moth super rainbow;736
yello;737
the servant;738
jana mashonee;739
burl ives;740
beyond the black;741
tsjuder;742
helen o'connell;743
golden gate quartet;744
debby boone;745
beady eye;746
acid king;747
westlife;748
big wreck;749
manowar;750
almôra;751
sarah darling;752
kenny white;753
delta spirit;754
curtis stigers;755
fun.;756
feist;757
karate;758
sleepy john estes;759
lamb;760
the roches;761
tuatha de danann;762
horrified;763
fort minor;764
jesse harris;765
berman;766
shannon & the clams;767
indigo girls;768
the matches;769
eric stewart;770
jls;771
kristin hersh;772
christon gray;773
loreena mckennitt;774
charley pride;775
jocelyn enriquez;776
helen baylor;777
hot tuna;778
skeeter davis;779
brenton brown;780
from good homes;781
leslie hall;782
natalia kills;783
chris thompson;784
circle of dust;785
transmetal;786
australian crawl;787
dying fetus;788
ratcat;789
the waifs;790
elevation worship;791
bryan ferry;792
camille;793
patty griffin;794
papa charlie jackson;795
grendel;796
chris murray;797
ryan stevenson;798
kiethevez;799
the glorious unseen;800
the ocean blue;801
conducting from the grave;802
negativland;803
del shannon;804
kathryn scott;805
this ending;806
bomfunk mc's;807
opera ix;808
steps;809
the birthday massacre;810
bronski beat;811
the burns sisters;812
willy deville;813
girlicious;814
death angel;815
the quakes;816
william fitzsimmons;817
fm;818
beggars & thieves;819
richard buckner;820
iwrestledabearonce;821
here come the mummies;822
only crime;823
saint saviour;824
millencolin;825
bigwig;826
benny mardones;827
wale;828
frida;829
björn ulvaeus & benny andersson;830
artrosis;831
neurosis;832
elis;833
spain;834
y&t;835
jeff lynne;836
syreeta;837
ryan adams;838
the lords of the new church;839
stephen stills;840
joel plaskett emergency;841
blood or whiskey;842
chenoa;843
l.t.d.;844
mariah carey;845
bauhaus;846
emma ruth rundle;847
billy bragg & wilco;848
sara evans;849
sara bareilles;850
cold world;851
pig destroyer;852
william elliott whitmore;853
eyes set to kill;854
agressor;855
skiltron;856
oysterband;857
versaemerge;858
now, now;859
chelsea grin;860
kaiser/mansfield;861
george thorogood & the destroyers;862
foghat;863
xtc;864
the nighthawks;865
eric burdon & war;866
the cross movement;867
the winery dogs;868
edyta górniak;869
hexrx;870
the cheeky girls;871
gazebo;872
canned heat;873
anne sophie mutter;874
sergei prokofiev;875
beneath the sky;876
jimmy reed;877
annihilator;878
the essex green;879
john west;880
bloodhound gang;881
beth hart & joe bonamassa;882
lacrimas profundere;883
jbm;884
commander cody and his lost planet airmen;885
smoking popes;886
seeed;887
moon martin;888
terence trent d'arby;889
the darkest of the hillside thickets;890
peter tosh;891
throwdown;892
the allman brothers band;893
caribou;894
axel rudi pell;895
ne-yo;896
joe ely;897
james blake;898
macklemore & ryan lewis;899
王力宏 (leehom wang);900
sam brown;901
van morrison;902
bella morte;903
josé feliciano;904
john popper;905
genghis tron;906
b3;907
phil keaggy;908
alesha dixon;909
peace, love and pitbulls;910
agents of mercy;911
elton john;912
etta james;913
plus one;914
spacemen 3;915
tommy castro;916
god forbid;917
abysmal dawn;918
cadillac blindside;919
a*teens;920
q5;921
mr. president;922
richmond fontaine;923
polly paulusma;924
hear'say;925
vertical church band;926
elvis costello;927
nickelback;928
the bolshoi;929
kenny loggins;930
bad manners;931
dear reader;932
tom robinson band;933
delerium;934
shirley caesar;935
trae;936
tesseract;937
500 miles to memphis;938
splitsville;939
band of susans;940
edl;941
half-a-mill;942
mechanical poet;943
clay aiken;944
mars argo;945
palisades;946
ian hunter;947
tracey thorn;948
jackson heights;949
downplay;950
old dominion;951
jamey johnson;952
deb talan;953
little walter;954
grits;955
maren ord;956
longfellow;957
layzie bone;958
cuby + blizzards;959
oszibarack;960
jillette johnson;961
the hundred in the hands;962
culture beat;963
frozen plasma;964
paul robeson;965
alt-j;966
darkside;967
other lives;968
blackfoot;969
the beta band;970
sally seltmann;971
foals;972
robin williamson;973
gluecifer;974
tristan prettyman;975
guy sebastian;976
pink guy;977
johann sebastian bach;978
methyl ethel;979
active child;980
john anderson;981
neil halstead;982
phantom planet;983
john d. loudermilk;984
greg long;985
the limeliters;986
peggy seeger;987
b la bart k;988
fallujah;989
shanice;990
darius danesh;991
post malone;992
audrey;993
parry gripp;994
barry white;995
ravenous;996
nils lofgren;997
paddy goes to holyhead;998
yefim bronfman;999
prefab sprout;1000
rose funeral;1001
the boxmasters;1002
eluveitie;1003
tony yayo;1004
michael learns to rock;1005
dawes;1006
lodger;1007
the joe perry project;1008
mandragora scream;1009
kristene dimarco;1010
today is the day;1011
riot;1012
skold;1013
pendragon;1014
el debarge;1015
the wanted;1016
the pharcyde;1017
jason derulo;1018
black light burns;1019
raintime;1020
lisa hannigan;1021
moby;1022
tedeschi trucks band;1023
breaking laces;1024
deströyer 666;1025
glenn hughes;1026
martin carthy and dave swarbrick;1027
the verve;1028
orleans;1029
the browns;1030
the drums;1031
between the buried and me;1032
the kingston trio;1033
jean shepard;1034
almah;1035
gare du nord;1036
the bellamy brothers;1037
sandra;1038
the twang;1039
gorod;1040
pandora;1041
mick jagger;1042
rebellion;1043
lauren hoffman;1044
poco;1045
clara smith;1046
oscar peterson;1047
slobberbone;1048
pitchshifter;1049
hania;1050
ziggy marley;1051
billie jo spears;1052
dum dum girls;1053
tricky;1054
lamont dozier;1055
slingshot dakota;1056
koko taylor;1057
judas priest;1058
idiot stare;1059
olivier messiaen;1060
akala;1061
it dies today;1062
fred eaglesmith;1063
jessie james;1064
moving mountains;1065
knights of the abyss;1066
gregorian;1067
mr weebl;1068
johnnie allan;1069
newton faulkner;1070
lonely kings;1071
al stewart;1072
broods;1073
zyklon;1074
basia;1075
damaged;1076
the reign of kindo;1077
black bomb a;1078
vic damone;1079
desert rose band;1080
swing out sister;1081
arjen anthony lucassen;1082
kerrs pink;1083
level 42;1084
the dollyrots;1085
giant squid;1086
jamie cullum;1087
fritz kalkbrenner;1088
the whispers;1089
pilot speed;1090
adhesive;1091
leona lewis;1092
hank williams;1093
chris botti;1094
creeper;1095
tori amos;1096
evocation;1097
gothminister;1098
mandolin orange;1099
namie amuro;1100
black sheep;1101
bleed the sky;1102
laura pausini;1103
consequence;1104
fever ray;1105
third day;1106
bed ich smetana;1107
the chain gang of 1974;1108
comes with the fall;1109
duff mckagan's loaded;1110
israel kamakawiwo'ole;1111
pixie lott;1112
bruce dickinson;1113
sonny landreth;1114
squeeze;1115
pennywise;1116
red café;1117
the autumn offering;1118
crashdïet;1119
neil young;1120
hi-tek;1121
hanson;1122
the blues brothers;1123
snow tha product;1124
ibeyi;1125
carpathian forest;1126
sheb wooley;1127
russian red;1128
american authors;1129
nick lachey;1130
jurassic 5;1131
the smashing pumpkins;1132
the lyric quartet;1133
howard shore;1134
julian lloyd webber;1135
syleena johnson;1136
wolf alice;1137
nico;1138
beach slang;1139
robin zander;1140
tanya tucker;1141
mac;1142
mogg/way;1143
shaggy 2 dope;1144
godley & creme;1145
n.e.r.d;1146
carbon leaf;1147
august burns red;1148
1349;1149
smokey robinson & the miracles;1150
röyksopp;1151
stephanie mills;1152
halestorm;1153
webb wilder;1154
chris stapleton;1155
paul oakenfold;1156
planetshakers;1157
andrew belle;1158
baha men;1159
memphis willie b.;1160
andromeda;1161
cynic;1162
gil shaham;1163
george;1164
toad the wet sprocket;1165
kiuas;1166
cat power;1167
metric;1168
saving jane;1169
patrick watson;1170
faith evans;1171
trivium;1172
kelly willis;1173
my darkest days;1174
the reason;1175
david gilmour;1176
roo panes;1177
sasha;1178
sympathy;1179
go periscope;1180
american me;1181
sex pistols;1182
jay-z;1183
tiësto;1184
jean sibelius;1185
war;1186
agent 51;1187
thrice;1188
amel larrieux;1189
the futureheads;1190
arlo guthrie;1191
the saw doctors;1192
hungry lucy;1193
michelle malone;1194
kay starr;1195
adestria;1196
ab-soul;1197
the merry wives of windsor;1198
bo carter;1199
charlotte gainsbourg;1200
suicidal angels;1201
zolof the rock & roll destroyer;1202
brian kennedy;1203
jess glynne;1204
the enemy;1205
crystal lewis;1206
hopesfall;1207
helmet;1208
alicia keys;1209
tom milsom;1210
keith green;1211
macy gray;1212
libera;1213
the myriad;1214
steven wilson;1215
dennis wilson;1216
war of ages;1217
elliott brood;1218
ace of base;1219
elefant;1220
ryker's;1221
apologetix;1222
stream of passion;1223
interface;1224
susan tedeschi;1225
circle takes the square;1226
team starkid;1227
dillinger four;1228
jt the bigga figga;1229
mf doom;1230
carrie newcomer;1231
big & rich;1232
caliban;1233
project pat;1234
joan osborne;1235
juicy j;1236
beady belle;1237
victory;1238
syd barrett;1239
liv kristine;1240
anything box;1241
devin townsend project;1242
amanda marshall;1243
die krupps;1244
sonny boy williamson ii;1245
assuming we survive;1246
soko;1247
much the same;1248
faunts;1249
sally oldfield;1250
the bottle rockets;1251
kaledon;1252
lighthouse family;1253
the mākaha sons;1254
dark moor;1255
antagonist a.d.;1256
lee dorsey;1257
merle travis;1258
cursed;1259
pete rock;1260
brother cane;1261
...and oceans;1262
ancient bards;1263
the searchers;1264
cappella;1265
iced earth;1266
marmaduke duke;1267
colin meloy;1268
venomous concept;1269
anne murray;1270
jay farrar;1271
the town pants;1272
chubby checker;1273
tori kelly;1274
the dodos;1275
chris spedding;1276
pmtoday;1277
thunder lord;1278
bloodbath;1279
dear criminals;1280
aaron carter;1281
shai hulud;1282
lil skies;1283
jeniferever;1284
jucifer;1285
the new pornographers;1286
fabrizio faniello;1287
fleurie;1288
mirror of deception;1289
the real mckenzies;1290
q-tip;1291
neuraxis;1292
rick derringer;1293
crystal kay;1294
robert randolph & the family band;1295
naâman;1296
little jimmy dickens;1297
sir mix-a-lot;1298
krs-one;1299
daniil trifonov;1300
morning glory;1301
cheap trick;1302
king tee;1303
angela mccluskey;1304
derdian;1305
heitor villa lobos;1306
nina;1307
tyrese;1308
dope stars inc.;1309
vendetta red;1310
pussycat;1311
benjamin gibbard;1312
nine;1313
de lux;1314
william kapell;1315
vomito negro;1316
a$ap rocky;1317
mo b. dick;1318
gamma ray;1319
sarah vaughan;1320
georges bizet;1321
acid ranch;1322
deadline;1323
lady sovereign;1324
flipsyde;1325
jim jackson;1326
violent femmes;1327
emeli sandé;1328
la bionda;1329
sammie;1330
joe budden;1331
butterfly boucher;1332
martha and the muffins;1333
the faction;1334
sohn;1335
the cyrkle;1336
the butchies;1337
magna-fi;1338
jennifer nettles;1339
udora;1340
missing persons;1341
hey mercedes;1342
cracker;1343
alphaville;1344
behexen;1345
the tremeloes;1346
the power station;1347
mark collie;1348
janet jackson;1349
out out;1350
mystery;1351
warrant;1352
kamelot;1353
max webster;1354
kristian stanfill;1355
ken hensley;1356
lyfe jennings;1357
gusgus;1358
after the burial;1359
sam tsui;1360
tony orlando & dawn;1361
guy clark;1362
electric wizard;1363
vanna;1364
nicole c. mullen;1365
prozak;1366
spahn ranch;1367
william mcdowell;1368
brandy clark;1369
jamie lidell;1370
grief;1371
the maccabees;1372
weedeater;1373
marlango;1374
hirax;1375
elvis presley;1376
aly & aj;1377
ellis paul;1378
we the kings;1379
bound stems;1380
symphony x;1381
randy stonehill;1382
man;1383
centinex;1384
blaine larsen;1385
aqueduct;1386
june of 44;1387
we came as romans;1388
natasha bedingfield;1389
divinefire;1390
alan hull;1391
outlawz;1392
seasick steve;1393
nerina pallot;1394
sandy denny;1395
glenn gould;1396
matt dusk;1397
strawberry alarm clock;1398
stryper;1399
stefanie heinzmann;1400
miracle of sound;1401
pop unknown;1402
rahsaan patterson;1403
yellowman;1404
the clay people;1405
the good life;1406
leroy carr;1407
sonata arctica;1408
tom odell;1409
that handsome devil;1410
birdy;1411
jimmy cliff;1412
kompressor;1413
deadmau5;1414
linda ronstadt;1415
ufx;1416
blind blake;1417
jody watley;1418
razed in black;1419
dave clark five;1420
diddy;1421
anacrusis;1422
dropkick murphys;1423
doyle bramhall;1424
lisa "left eye" lopes;1425
ll cool j;1426
deadsoul tribe;1427
sunset rubdown;1428
heaven 17;1429
pavlov's dog;1430
billie eilish;1431
dido;1432
deathboy;1433
antaeus;1434
dreamland;1435
the beloved;1436
the arrogant worms;1437
closet monster;1438
eartha kitt;1439
radu lupu;1440
cinderella effect;1441
anathema;1442
for the fallen dreams;1443
shola ama;1444
big thief;1445
armand van helden;1446
fake?;1447
phil harris;1448
loggins & messina;1449
promise of redemption;1450
the walkabouts;1451
eisley;1452
big joe turner;1453
axenstar;1454
hank williams iii;1455
b.o.b.;1456
wallis bird;1457
as i lay dying;1458
trey songz;1459
charlotte hatherley;1460
nneka;1461
anúna;1462
asia;1463
mcauley schenker group;1464
rose polenzani;1465
john mayer;1466
grant-lee phillips;1467
taj mahal;1468
ruston kelly;1469
melissa manchester;1470
lenka;1471
kira isabella;1472
hap palmer;1473
taken by trees;1474
anni b sweet;1475
planet p project;1476
the velvet underground;1477
hoagy carmichael;1478
testament;1479
the 3rd and the mortal;1480
fever tree;1481
iris dement;1482
happy days;1483
prostitute disfigurement;1484
the andrews sisters;1485
eyedea & abilities;1486
mipso;1487
dinu lipatti;1488
josephine foster;1489
stephen sondheim;1490
little big;1491
kip winger;1492
the voidz;1493
matchbook romance;1494
green carnation;1495
xiu xiu;1496
the hard-ons;1497
glasseater;1498
bloodlined calligraphy;1499
yodelice;1500
infectious grooves;1501
alex lloyd;1502
overcome;1503
tom dice;1504
rorschach test;1505
nat & alex wolff;1506
van halen;1507
robert earl keen;1508
dave hollister;1509
rob thomas;1510
chanté moore;1511
lyle lovett;1512
aaron lines;1513
my life with the thrill kill kult;1514
honne;1515
the flatliners;1516
eric b. & rakim;1517
old man gloom;1518
new found glory;1519
louis logic;1520
murray perahia;1521
bass drum of death;1522
702;1523
jamie o'neal;1524
the sheila divine;1525
chris ledoux;1526
huski;1527
wolf parade;1528
emmure;1529
defiance, ohio;1530
nine below zero;1531
jamie winchester;1532
cece winans;1533
splashdown;1534
the strumbellas;1535
otis spann;1536
juice wrld;1537
brett anderson;1538
the ambassador;1539
arturo benedetti michelangeli;1540
spinal tap;1541
wild strawberries;1542
mystikal;1543
acumen nation;1544
the ex;1545
pearls before swine;1546
the ink spots;1547
mayhem;1548
e;1549
sara noxx;1550
kid cudi;1551
neko case;1552
dethklok;1553
vast;1554
lila mccann;1555
boxcar willie;1556
the soundtrack of our lives;1557
the deep dark woods;1558
johnossi;1559
sinéad lohan;1560
tragic black;1561
public enemy;1562
toby keith;1563
jesus on extasy;1564
after forever;1565
lightnin' hopkins;1566
any given day;1567
terminal choice;1568
head east;1569
sturgill simpson;1570
cavalera conspiracy;1571
pitboss 2000;1572
broken social scene;1573
hanzel und gretyl;1574
kajagoogoo;1575
village people;1576
the most serene republic;1577
ernest tubb and loretta lynn;1578
arno;1579
ninja sex party;1580
bts;1581
charlie simpson;1582
criss angel;1583
local natives;1584
elliphant;1585
right away, great captain!;1586
frankie valli;1587
dion;1588
j. tillman;1589
krayzie bone;1590
atlanta rhythm section;1591
grimes;1592
slapp happy;1593
voivod;1594
ella fitzgerald;1595
maddy prior;1596
exo;1597
bobby v;1598
cherry glazerr;1599
jackie deshannon;1600
nancy sinatra;1601
dragonheart;1602
the casualties;1603
polysics;1604
cliff richard;1605
sleeping with sirens;1606
dew-scented;1607
diva destruction;1608
jens lekman;1609
charlie landsborough;1610
born ruffians;1611
joe diffie;1612
sonny terry;1613
the black angels;1614
envy;1615
mary lou lord;1616
of mice & men;1617
tina arena;1618
candlemass;1619
stacie orrico;1620
too $hort;1621
bombay bicycle club;1622
the dc3;1623
marié digby;1624
frank ocean;1625
slash's snakepit;1626
akcent;1627
mortal sin;1628
the rural alberta advantage;1629
alter bridge;1630
caligula's horse;1631
diana ross;1632
strand of oaks;1633
austrian death machine;1634
ana popovic;1635
skepticism;1636
shirley horn;1637
siva six;1638
valentine wolfe;1639
titanic sinclair;1640
joshua perahia;1641
gallows;1642
luxt;1643
nick lowe;1644
clarence "gatemouth" brown;1645
sweet noise;1646
thank you scientist;1647
cherish the ladies;1648
ten years after;1649
frost;1650
first blood;1651
nightwish;1652
musiq soulchild;1653
big bill broonzy;1654
benjamin francis leftwich;1655
phantom blue;1656
dune;1657
hangnail;1658
harold melvin & the blue notes;1659
big d and the kids table;1660
zeromancer;1661
jello biafra;1662
yg;1663
katharine mcphee;1664
quinn xcii;1665
mississippi john hurt;1666
new trolls;1667
wizards;1668
kix;1669
slapshot;1670
tor miller;1671
xentrifuge;1672
toni braxton;1673
finch;1674
caroline herring;1675
dreadful shadows;1676
ringworm;1677
cory asbury;1678
dezperadoz;1679
mac davis;1680
dionysus;1681
michael w. smith;1682
cold as life;1683
peabo bryson;1684
k.d. lang;1685
grammatrain;1686
jorn;1687
no-man;1688
nocturne;1689
the screaming jets;1690
charli xcx;1691
tactical sekt;1692
oomph!;1693
atlas sound;1694
the idle race;1695
helstar;1696
toxik;1697
jesus culture;1698
cissy houston;1699
catman cohen;1700
strike anywhere;1701
toni childs;1702
mika;1703
theory in practice;1704
lucinda williams;1705
lord belial;1706
raul midón;1707
ida;1708
trisha yearwood;1709
bad astronaut;1710
the runaways;1711
a day to remember;1712
milk inc.;1713
fisher;1714
king kobra;1715
ma rainey;1716
ralph stanley;1717
andr watts;1718
gregory and the hawk;1719
the temptations;1720
flaw;1721
terror squad;1722
black;1723
bolt thrower;1724
matt goss;1725
nappy roots;1726
a$ap ferg;1727
shawn mendes;1728
alison krauss & union station;1729
eric johnson;1730
ashley monroe;1731
old crow medicine show;1732
kelis;1733
bad habit;1734
van canto;1735
the birthday party;1736
rowland s. howard;1737
marsha ambrosius;1738
little dragon;1739
k'jon;1740
jack white;1741
cactus;1742
daft punk;1743
jon oliva's pain;1744
a$ap mob;1745
emika;1746
lazar berman;1747
mark kozelek;1748
ice-t;1749
little richard;1750
elijah blake;1751
the laurie berkner band;1752
clara luzia;1753
ma$e;1754
dikembe;1755
boz scaggs;1756
antony and the johnsons;1757
autopilot off;1758
big audio dynamite;1759
grant lee buffalo;1760
john reuben;1761
mission of burma;1762
unloco;1763
transit;1764
marina and the diamonds;1765
alela diane;1766
the sorrow;1767
gossip;1768
emerald;1769
lucille bogan;1770
frank zappa;1771
the coathangers;1772
captain jack;1773
stellastarr*;1774
david kersh;1775
broken bones;1776
hayley kiyoko;1777
wire;1778
thurston moore;1779
cop shoot cop;1780
the white buffalo;1781
bad books;1782
irene cara;1783
gorillaz;1784
the gap band;1785
lead belly;1786
cassadee pope;1787
elvis depressedly;1788
curtis mayfield;1789
waylon;1790
the gun club;1791
behemoth;1792
she wants revenge;1793
the crüxshadows;1794
týr;1795
dan fogelberg;1796
stan ridgway;1797
blind witness;1798
deerhunter;1799
agalloch;1800
grand puba;1801
heavens edge;1802
the acacia strain;1803
beseech;1804
sting;1805
rival sons;1806
henry jamison;1807
the black lillies;1808
the cog is dead;1809
benno moiseiwitsch;1810
nazz;1811
greg brown;1812
reel big fish;1813
the muppets;1814
threshold;1815
tracie spencer;1816
cimorelli;1817
alexandra burke;1818
whigfield;1819
eminem;1820
wolverine;1821
grave maker;1822
example;1823
lambchop;1824
madeleine peyroux;1825
bondage fairies;1826
darren hanlon;1827
lyria;1828
capital kings;1829
john pizzarelli;1830
high on fire;1831
stereolab;1832
machines of loving grace;1833
kim carnes;1834
so many dynamos;1835
phony ppl;1836
san cisco;1837
emilie autumn;1838
pietro locatelli;1839
solefald;1840
kellie pickler;1841
the maranatha! singers;1842
danbert nobacon;1843
soulja boy;1844
mario winans;1845
camille saint sa ns;1846
indecision;1847
lasgo;1848
karyn white;1849
hurts;1850
between the trees;1851
nat king cole;1852
front 242;1853
johannes brahms;1854
the national;1855
shane barnard;1856
chris knox;1857
poison girls;1858
oh, sleeper;1859
hell is for heroes;1860
pfr;1861
john lee hooker;1862
gramatik;1863
moya brennan;1864
pop evil;1865
scar symmetry;1866
silly wizard;1867
the ventures;1868
steve holy;1869
devotchkas;1870
ignite;1871
wayne newton;1872
the gufs;1873
decapitated;1874
billie the vision & the dancers;1875
thestart;1876
idle cure;1877
jill sobule;1878
the soviettes;1879
the irish rovers;1880
münchener freiheit;1881
pentagram;1882
masterboy;1883
return;1884
minus story;1885
satan;1886
byron cage;1887
listener;1888
the mars volta;1889
jennifer love hewitt;1890
eartha;1891
no doctors;1892
steve hackett;1893
elvin bishop;1894
k'naan;1895
ruby;1896
princess nokia;1897
ellie goulding;1898
material issue;1899
fun boy three;1900
circulatory system;1901
the road hammers;1902
brian mcknight;1903
"weird al" yankovic;1904
the wailers;1905
kurupt;1906
criminal;1907
bal-sagoth;1908
lou reed;1909
queen;1910
rudimentary peni;1911
a great big world;1912
jarboe;1913
augie march;1914
jim guthrie;1915
emma bunton;1916
aaron watson;1917
έλενα παπαρίζου;1918
blue cheer;1919
mark harris;1920
alexander scriabin;1921
andrew gold;1922
climax blues band;1923
conjure one;1924
blind guardian;1925
stephen schwartz;1926
martha wainwright;1927
lil' flip;1928
katherine jenkins;1929
amy shark;1930
styles p;1931
new model army;1932
franz liszt;1933
ben weasel;1934
john k. samson;1935
jason anderson;1936
sheena easton;1937
inna;1938
mayer hawthorne;1939
easy rider;1940
eddy arnold;1941
zoegirl;1942
jimmy barnes;1943
tear da club up thugs;1944
daughter;1945
buzzcocks;1946
freezepop;1947
steve aoki;1948
kierra sheard;1949
captain beyond;1950
sabrina claudio;1951
mc shan;1952
the j. geils band;1953
camel;1954
indica;1955
attack in black;1956
the jam;1957
bananarama;1958
al jarreau;1959
eric carmen;1960
gwar;1961
us the duo;1962
ziggy alberts;1963
amir obè;1964
25 ta life;1965
newsboys;1966
chris knight;1967
marlene dietrich;1968
112;1969
wonderwall;1970
officer negative;1971
enochian crescent;1972
jupiter one;1973
whitecross;1974
tim mcgraw;1975
bile;1976
brandi carlile;1977
danzig;1978
twiztid;1979
david baerwald;1980
jimmy webb;1981
mamas gun;1982
harry chapin;1983
holy ghost!;1984
stephen hough;1985
k.t. oslin;1986
robin trower;1987
us3;1988
esben and the witch;1989
schiller;1990
god lives underwater;1991
hayseed dixie;1992
polluted inheritance;1993
nicki minaj;1994
robin gibb;1995
pinback;1996
bobbie gentry;1997
freakwater;1998
kite;1999
kid down;2000
georges cziffra;2001
sivert høyem;2002
conor oberst;2003
i'm from barcelona;2004
shining;2005
henry purcell;2006
jeremy messersmith;2007
goat of mendes;2008
catch 22;2009
vampire rodents;2010
the go-go's;2011
noah gundersen;2012
diablo swing orchestra;2013
uncle dave macon;2014
john eddie;2015
calvin harris;2016
roomful of blues;2017
bane;2018
sam lewis;2019
alejandro escovedo;2020
kekal;2021
manticora;2022
frankie lymon & the teenagers;2023
randy meisner;2024
carcass;2025
nile;2026
circus maximus;2027
non phixion;2028
eric woolfson;2029
starship;2030
kaiser chiefs;2031
sexy sadie;2032
confide;2033
boy meets girl;2034
henryk szeryng;2035
wynter gordon;2036
jay-jay johanson;2037
hannah fury;2038
mike jones;2039
bebe & cece winans;2040
trip lee;2041
tigers jaw;2042
hector berlioz;2043
minus the bear;2044
johnny winter;2045
my sister's machine;2046
gilbert o'sullivan;2047
jean michel jarre;2048
jody miller;2049
skylark;2050
the poodles;2051
fear of domination;2052
the donefors;2053
zed yago;2054
martha argerich;2055
faun fables;2056
joy electric;2057
the grass roots;2058
cygnosic;2059
disfear;2060
crosby, stills, nash & young;2061
69 boyz;2062
celesty;2063
the forecast;2064
the magnetic fields;2065
miranda lambert;2066
veto;2067
the offspring;2068
bobby goldsboro;2069
calibretto;2070
common rider;2071
10cc;2072
tim hughes;2073
bald vulture;2074
david cassidy;2075
simple minds;2076
little man tate;2077
carla thomas;2078
cher lloyd;2079
cameo;2080
tko;2081
george frideric handel;2082
immaculate fools;2083
kero kero bonito;2084
john sebastian;2085
joe nichols;2086
classified;2087
velvet revolver;2088
skip the use;2089
ratt;2090
gilberto gil;2091
the miracles;2092
the subways;2093
screwed up click;2094
fm static;2095
further seems forever;2096
barry louis polisar;2097
antiskeptic;2098
den harrow;2099
eleni mandell;2100
mungo jerry;2101
fucked up;2102
jermaine jackson;2103
freedom call;2104
phillip phillips;2105
van der graaf generator;2106
h-town;2107
madvillain;2108
pietasters;2109
sonicflood;2110
thomas dolby;2111
scud mountain boys;2112
orphanage;2113
eleventyseven;2114
peter hammill;2115
kaysha;2116
skyharbor;2117
south border;2118
mad marge and the stonecutters;2119
ben kweller;2120
monstrosity;2121
young galaxy;2122
pusha t;2123
john wesley;2124
glass harp;2125
blue system;2126
pain of salvation;2127
film school;2128
plasmatics;2129
down by law;2130
circus of power;2131
left alone;2132
girls under glass;2133
saga;2134
redemption;2135
memphis may fire;2136
aion;2137
kj-52;2138
gaia epicus;2139
stacy lattisaw;2140
larry norman;2141
david lee roth;2142
willie nelson;2143
melody gardot;2144
in the midst of lions;2145
iron butterfly;2146
katy rose;2147
izz;2148
sevyn streeter;2149
pendulum;2150
cult of luna;2151
avalanche city;2152
brother firetribe;2153
lena;2154
steve vai;2155
motosierra;2156
the bouncing souls;2157
maher zain;2158
colbie caillat;2159
blaze ya dead homie;2160
kyung wha chung;2161
delta goodrem;2162
lena katina;2163
the veronicas;2164
asleep at the wheel;2165
anne clark;2166
jimmie vaughan;2167
wildpath;2168
autumnblaze;2169
lefty frizzell;2170
the weakerthans;2171
real estate;2172
dj drama;2173
joji;2174
little milton;2175
beholder;2176
grace jones;2177
wolfheart;2178
kt tunstall;2179
robert forster;2180
lana lane;2181
meat loaf;2182
mark chesnutt;2183
autumn;2184
bronze nazareth;2185
ladysmith black mambazo;2186
memphis minnie;2187
the plimsouls;2188
36 crazyfists;2189
private line;2190
nikolai medtner;2191
jaden smith;2192
chris cagle;2193
beatnik termites;2194
bernard butler;2195
emin;2196
major accident;2197
the mayan factor;2198
stonewall jackson;2199
rat boy;2200
das efx;2201
roxette;2202
tears for fears;2203
hank locklin;2204
n.w.a;2205
hadouken!;2206
king diamond;2207
axe;2208
lolo;2209
rufus thomas;2210
montgomery gentry;2211
the front bottoms;2212
gabrielle;2213
beng beng cocktail;2214
wyclef jean;2215
cœur de pirate;2216
faithless;2217
heavy heavy low low;2218
cutting crew;2219
limbeck;2220
saukrates;2221
artie shaw;2222
jojo;2223
red rider;2224
nikolai rimsky korsakov;2225
choking victim;2226
the scene aesthetic;2227
fight amp;2228
émilie simon;2229
christoph willibald gluck;2230
boney m.;2231
ookla the mok;2232
beck;2233
mcfly;2234
hey ocean!;2235
dixie chicks;2236
lindsay lohan;2237
from autumn to ashes;2238
giovanni battista pergolesi;2239
boondox;2240
wilhelm kempff;2241
jay & the americans;2242
the game;2243
night in gales;2244
blazin' squad;2245
lords of acid;2246
darlingside;2247
squealer;2248
the ghost inside;2249
abbey lincoln;2250
cake bake betty;2251
all that remains;2252
bunny wailer;2253
culture club;2254
jim croce;2255
vixen;2256
chelsea wolfe;2257
the zutons;2258
the ship;2259
timbuk3;2260
wise guys;2261
gabriel faur ;2262
prototype;2263
great big sea;2264
celtic frost;2265
stage dolls;2266
vinnie paz;2267
symbols;2268
casanova;2269
horse the band;2270
raul seixas;2271
phil collins;2272
belphegor;2273
a whisper in the noise;2274
rufio;2275
trin-i-tee 5;2276
isaac hayes;2277
i am ghost;2278
gregg allman;2279
three 6 mafia;2280
t. mills;2281
as cities burn;2282
fear factory;2283
good riddance;2284
daniel m ller schott;2285
stromkern;2286
spirogyra;2287
resurrection band;2288
jamie's elsewhere;2289
one bad pig;2290
renaldo & the loaf;2291
danny kaye;2292
belle and sebastian;2293
cryptopsy;2294
yeah yeah yeahs;2295
the jayhawks;2296
mel tillis;2297
yazoo;2298
colin hay;2299
the desert sessions;2300
the dismemberment plan;2301
sammy adams;2302
discipline;2303
secrets of the moon;2304
tapping the vein;2305
army of the pharaohs;2306
big bad voodoo daddy;2307
professor green;2308
roine stolt;2309
wu-tang clan;2310
tracy byrd;2311
big maybelle;2312
aage kvalbein;2313
club nouveau;2314
mose allison;2315
edenbridge;2316
the foundations;2317
darkseed;2318
dear and the headlights;2319
twila paris;2320
ed bruce;2321
damien dempsey;2322
starlight mints;2323
accessory;2324
ador dorath;2325
boogie down productions;2326
stars;2327
creedence clearwater revival;2328
dwight yoakam;2329
4him;2330
gorguts;2331
demons & wizards;2332
陰陽座;2333
barstool prophets;2334
paolo nutini;2335
asking alexandria;2336
kid rock;2337
jerusalem;2338
them;2339
robert wyatt;2340
the weeknd;2341
jeff deyo;2342
beartooth;2343
pilot;2344
jenny hval;2345
the devil wears prada;2346
karine polwart;2347
maxwell;2348
michael rabin;2349
babyface;2350
white lies;2351
groundation;2352
nina hagen;2353
tracy lawrence;2354
ring of fire;2355
koda kumi;2356
umphrey's mcgee;2357
polkadot cadaver;2358
alabama;2359
edge of sanity;2360
thyrane;2361
dick brave & the backbeats;2362
pantera;2363
françoise hardy;2364
whirlwind heat;2365
lightning seeds;2366
disrupt;2367
true colors;2368
vérité;2369
jeremy riddle;2370
billy crawford;2371
royal hunt;2372
apache indian;2373
peggy lee;2374
the flight of sleipnir;2375
hard-fi;2376
ladytron;2377
massacration;2378
john browning;2379
grant hart;2380
chris hillman;2381
bear vs. shark;2382
descendents;2383
mark ronson;2384
nofx;2385
rich kids on lsd;2386
jordin sparks;2387
vv brown;2388
michael franks;2389
aram khachaturian;2390
bathory;2391
jessica pratt;2392
mormon tabernacle choir;2393
dan hill;2394
laleh;2395
curl up and die;2396
jonah matranga;2397
anna tsuchiya;2398
jack howard;2399
nick heyward;2400
steven curtis chapman;2401
pimp c;2402
chef'special;2403
super junior-d&e;2404
willie nelson & wynton marsalis;2405
lionel richie;2406
martyr;2407
eagles;2408
project pitchfork;2409
crosby, stills & nash;2410
slim thug;2411
frankie j;2412
the stereo;2413
blues pills;2414
fog;2415
bow wow wow;2416
gowan;2417
king charles;2418
sviatoslav richter;2419
m. ward;2420
freshlyground;2421
ed schrader's music beat;2422
company flow;2423
painbastard;2424
buddy jewell;2425
mckinney's cotton pickers;2426
mandisa;2427
watershed;2428
the wailin' jennys;2429
faderhead;2430
mötley crüe;2431
stormwarrior;2432
keb' mo';2433
jimmy witherspoon;2434
the lucksmiths;2435
faster pussycat;2436
knightowl;2437
keri hilson;2438
mxpx;2439
the nice;2440
ritual;2441
oxymoron;2442
catamenia;2443
vince gill;2444
bill fay;2445
toy-box;2446
love unlimited;2447
mc frontalot;2448
dolly parton;2449
papoose;2450
demi lovato;2451
rhodes;2452
big joe williams;2453
charlie rich;2454
extreme noise terror;2455
wilco;2456
low roar;2457
carrie underwood;2458
when particles collide;2459
3 feet smaller;2460
the supremes & the four tops;2461
the police;2462
empress of;2463
herman brood;2464
be your own pet;2465
kill switch...klick;2466
superjoint ritual;2467
lynyrd skynyrd;2468
al green;2469
hilltop hoods;2470
ghostlimb;2471
chamber - l'orchestre de chambre noir;2472
thou;2473
the icicle works;2474
debbie harry;2475
victoria beckham;2476
clare maguire;2477
cause & effect;2478
marion;2479
the lawrence arms;2480
x;2481
the russian futurists;2482
maggie rose;2483
jim cuddy;2484
the turtles;2485
as blood runs black;2486
the night flight orchestra;2487
graziano romani;2488
alcatrazz;2489
lp;2490
jon foreman;2491
popa chubby;2492
alfred cortot;2493
eddi reader;2494
plushgun;2495
sugababes;2496
sesame street;2497
johnny cash;2498
joe walsh;2499
jeanette biedermann;2500
ashford & simpson;2501
sleep;2502
hum;2503
papas fritas;2504
fifteen;2505
denzel curry;2506
phedora;2507
anders osborne;2508
the morning of;2509
cash rivers and the sinners;2510
blueprint;2511
slim dusty;2512
acappella;2513
katatonia;2514
diary of dreams;2515
deine lakaien;2516
rick wakeman;2517
freedom fry;2518
fruupp;2519
jean baptiste lully;2520
october 31;2521
sinister;2522
jesse mccartney;2523
vigilantes of love;2524
elegant machinery;2525
corbin bleu;2526
gza/genius;2527
alison wonderland;2528
kelly price;2529
destruction;2530
fractured;2531
david ford;2532
julie miller;2533
joe mcelderry;2534
phinehas;2535
sylvan esso;2536
basement jaxx;2537
shearwater;2538
deadstar assembly;2539
wildbirds & peacedrums;2540
patty smyth;2541
short stack;2542
jupiter apple;2543
raven-symoné;2544
misery loves co.;2545
the motels;2546
steppenwolf;2547
vic chesnutt;2548
asha;2549
the almighty;2550
lauryn hill;2551
khalid;2552
great lake swimmers;2553
ashanti;2554
蔡依林 (jolin tsai);2555
donnie munro;2556
john fogerty;2557
kimberley locke;2558
keith moon;2559
janis joplin;2560
reamonn;2561
psychopathic rydas;2562
air supply;2563
ashlee simpson;2564
tame impala;2565
perzonal war;2566
adem;2567
franz ferdinand;2568
martina mcbride;2569
natalie cole;2570
jason falkner;2571
rosalyn tureck;2572
missy higgins;2573
beach house;2574
current 93;2575
a hill to die upon;2576
ghoti hook;2577
forgive durden;2578
gojira;2579
jay ferguson;2580
eilera;2581
kate nash;2582
foxes;2583
at the drive-in;2584
job for a cowboy;2585
from ashes to new;2586
brooke white;2587
dwele;2588
vacuum;2589
samson fran ois;2590
loverboy;2591
black country communion;2592
paul revere and the raiders;2593
chris brown;2594
tommy lee;2595
vengaboys;2596
joy zipper;2597
bella hardy;2598
blondie;2599
brian setzer;2600
mc zulu;2601
building 429;2602
cindy bullens;2603
cyne;2604
b*witched;2605
the black eyed peas;2606
paul weller;2607
gun;2608
illogic;2609
men at work;2610
luv';2611
shadows fall;2612
the crest;2613
zendaya;2614
runemagick;2615
john farnham;2616
josh ritter;2617
funeral;2618
wanda jackson;2619
waylon jennings;2620
falco;2621
marlon williams;2622
seals & crofts;2623
kurt elling;2624
neon hitch;2625
the swift;2626
sub focus;2627
bent knee;2628
trevor rabin;2629
bazzi;2630
jim's big ego;2631
andrew huang;2632
glenn miller;2633
donna summer;2634
the meteors;2635
ugly duckling;2636
李玟 (coco lee);2637
bruno mars;2638
sambassadeur;2639
the neville brothers;2640
terrorgruppe;2641
the lone bellow;2642
deer tick;2643
bentley jones;2644
carter the unstoppable sex machine;2645
connie talbot;2646
adult.;2647
crosby & nash;2648
ayọ;2649
jay rock;2650
crystal castles;2651
tyrone wells;2652
heavens to betsy;2653
the limousines;2654
chris garneau;2655
13th floor elevators;2656
dirty pretty things;2657
the more i see;2658
raimon;2659
winterstorm;2660
spice 1;2661
jade valerie;2662
kacey musgraves;2663
roko;2664
lights of euphoria;2665
the bronx;2666
raffaella carrà;2667
mastercastle;2668
city and colour;2669
salvador;2670
carolyn arends;2671
motörhead;2672
justin young;2673
the dreadnoughts;2674
india.arie;2675
decoded feedback;2676
guy verlinde;2677
the clientele;2678
rapture;2679
caterina valente;2680
battle beast;2681
wednesday 13;2682
walter gieseking;2683
the duckworth lewis method;2684
bloc party;2685
zuill bailey;2686
mother love bone;2687
martha and the vandellas;2688
blitzkid;2689
nightrage;2690
martin zellar;2691
38 special;2692
christina grimmie;2693
dagoba;2694
glenn tipton;2695
blue rodeo;2696
acoustic junction;2697
led zeppelin;2698
evergrey;2699
the locust;2700
jazmine sullivan;2701
the colourfield;2702
yob;2703
vengeance rising;2704
chunk! no, captain chunk!;2705
the alan parsons project;2706
dustin kensrue;2707
fool's garden;2708
saucy monky;2709
sergei rachmaninoff;2710
charlie sexton;2711
babyshambles;2712
nina simone;2713
born against;2714
dååth;2715
jon allen;2716
leigh nash;2717
the vandals;2718
matt redman;2719
marian hill;2720
tanita tikaram;2721
leonard bernstein;2722
raffi;2723
pharao;2724
miseration;2725
allan taylor;2726
rita ora;2727
gary glitter;2728
girlschool;2729
k-ci & jojo;2730
jimmy dorsey;2731
.moneen.;2732
imperia;2733
virtuoso;2734
votum;2735
lucy woodward;2736
alvin stardust;2737
will young;2738
crown of thorns;2739
arcadi volodos;2740
the jim yoshii pile-up;2741
a life divided;2742
children 18;2743
james ingram;2744
梶浦由記 (yuki kajiura);2745
ynw melly;2746
jump, little children;2747
trans-siberian orchestra;2748
armageddon;2749
faces;2750
bokka;2751
leviathan;2752
the black keys;2753
the panic division;2754
roy drusky;2755
veggietales;2756
jt music;2757
tim o'brien;2758
brockhampton;2759
the sundays;2760
hayes carll;2761
leatherface;2762
spirit of the west;2763
dawn landes;2764
kygo;2765
isis;2766
meek mill;2767
brisa roché;2768
freesscape;2769
joe hill;2770
pride and fall;2771
54-40;2772
washed out;2773
michael sembello;2774
crowded house;2775
cpr;2776
steve winwood;2777
jukebox the ghost;2778
atari teenage riot;2779
ballyhoo!;2780
eddie floyd;2781
trophy scars;2782
erykah badu;2783
james cotton;2784
kristinia debarge;2785
morbid angel;2786
code red;2787
al kooper;2788
monty python;2789
the mary onettes;2790
future;2791
shabazz the disciple;2792
sad café;2793
x-fusion;2794
deniece williams;2795
robert gordon;2796
seabear;2797
wallows;2798
john illsley;2799
dream theater;2800
secret lives of the freemasons;2801
josef suk;2802
the bled;2803
gregor piatigorsky;2804
john ralston;2805
tim kasher;2806
the hellacopters;2807
autopsy;2808
the silencers;2809
hank green;2810
abk;2811
hound dog taylor;2812
angela bofill;2813
wade bowen;2814
jamestown story;2815
andy partridge;2816
echolyn;2817
search the city;2818
giacomo puccini;2819
gregory isaacs;2820
stormtroopers of death;2821
silkk the shocker;2822
1208;2823
everlife;2824
bobby mcferrin;2825
pretty ricky;2826
ronna reeves;2827
annie lennox;2828
london grammar;2829
john doe;2830
shel silverstein;2831
blue october;2832
aaron lee tasjan;2833
marion raven;2834
carl perkins;2835
big pokey;2836
eddie cochran;2837
enter the haggis;2838
burden of a day;2839
tourniquet;2840
emilíana torrini;2841
brian doerksen;2842
crossfaith;2843
ludwig van beethoven;2844
good clean fun;2845
an horse;2846
kali uchis;2847
stone breath;2848
daniele liverani;2849
patrick stump;2850
henry krieger;2851
fit for a king;2852
angel corpse;2853
the audition;2854
helalyn flowers;2855
fleet foxes;2856
the wiggles;2857
clouds;2858
peter cetera;2859
wreckshop family;2860
the mistake;2861
lacrimosa;2862
carpenters;2863
the crabb family;2864
twilight fauna;2865
anton n dvo k;2866
patti page;2867
dawnbringer;2868
bachman-turner overdrive;2869
love like blood;2870
pat mcgee band;2871
shedaisy;2872
the other ones;2873
hatebreed;2874
iamamiwhoami;2875
showbread;2876
thyx;2877
face to face;2878
bonnie owens;2879
this wild life;2880
adrienne young;2881
tombs;2882
f.k.ü.;2883
carl nielsen;2884
trouble;2885
peter wolf;2886
shout out louds;2887
new york philharmonic;2888
paul and storm;2889
the dickies;2890
hussein fatal;2891
a tribe called quest;2892
jade 4u;2893
bonobo;2894
alphabeat;2895
gladys knight;2896
tilt;2897
jacqueline du pr ;2898
boyz ii men;2899
blood oranges;2900
scala & kolacny brothers;2901
steel attack;2902
vienna teng;2903
nunslaughter;2904
roxy music;2905
benny benassi;2906
feargal sharkey;2907
willie dixon;2908
mary j. blige;2909
ghost town;2910
george ezra;2911
rose cousins;2912
the stone roses;2913
mc5;2914
go west;2915
knut;2916
the head and the heart;2917
room eleven;2918
simon and garfunkel;2919
wilson pickett;2920
the amboy dukes;2921
walk the moon;2922
abgott;2923
truly;2924
the louvin brothers;2925
melanie c;2926
enchant;2927
duffy;2928
union;2929
great white;2930
the violet burning;2931
kat-tun;2932
mercyme;2933
elmore james;2934
johnny horton;2935
dreams of sanity;2936
vázquez sounds;2937
kraftwerk;2938
cool hand luke;2939
country joe mcdonald;2940
abandoned pools;2941
the swell season;2942
3 inches of blood;2943
rocky loves emily;2944
whores.;2945
natalie grant;2946
frank proffitt;2947
red fang;2948
jason mraz;2949
barefoot truth;2950
lee aaron;2951
frank hutchison;2952
kasey chambers;2953
sick of it all;2954
girls in hawaii;2955
geoff farina;2956
rory gallagher;2957
out of eden;2958
mø;2959
darkthrone;2960
eddie money;2961
envy on the coast;2962
virgin prunes;2963
mississippi sheiks;2964
capitol steps;2965
battery;2966
hilary duff;2967
betraying the martyrs;2968
stray cats;2969
the groundhogs;2970
thieves and villains;2971
la roux;2972
southern raiders band;2973
cam'ron;2974
mississippi fred mcdowell;2975
gary u.s. bonds;2976
matt duke;2977
stillborn;2978
mnemic;2979
jerry cantrell;2980
clannad;2981
mychildren mybride;2982
dope;2983
his hero is gone;2984
deltron 3030;2985
nebelhexë;2986
angelic upstarts;2987
bebo norman;2988
kb;2989
patricia barber;2990
tsol;2991
cathedral;2992
glenn kaiser;2993
peter koppes;2994
the hunna;2995
peaches;2996
bryan adams;2997
michael kiske;2998
sigrid;2999
gordon bok;3000
the vamps;3001
burning witches;3002
the twilight singers;3003
teyana taylor;3004
robbie robertson;3005
black veil brides;3006
corbin-hanner band;3007
white denim;3008
see you next tuesday;3009
corey hart;3010
shy;3011
visions of atlantis;3012
pyrexia;3013
rah digga;3014
killwhitneydead;3015
austra;3016
yung joc;3017
sevendust;3018
miles kane;3019
albert hammond, jr.;3020
the sleepy jackson;3021
quarterflash;3022
baba brinkman;3023
gautier capu on;3024
heather dale;3025
hinder;3026
evoken;3027
kardinal offishall;3028
jett rebel;3029
n-dubz;3030
marcia ball;3031
bucks fizz;3032
freddie jackson;3033
steve grand;3034
leonard pennario;3035
summoning;3036
blaqk audio;3037
the sisters of mercy;3038
doyle bramhall ii;3039
dominici;3040
al b. sure!;3041
giant sand;3042
colosseum;3043
johannes moser;3044
yeasayer;3045
unearth;3046
oneiroid psychosis;3047
sheila e.;3048
alicia de larrocha;3049
last tuesday;3050
the bonzo dog doo-dah band;3051
gilbert and sullivan;3052
this day & age;3053
anekdoten;3054
greg lake;3055
mike scott;3056
black tide;3057
colony 5;3058
the malibooz;3059
adam ant;3060
pacewon;3061
queensrÿche;3062
the ames brothers;3063
rooster;3064
rare bird;3065
justin nozuka;3066
elle milano;3067
yacht;3068
neville marriner;3069
faith no more;3070
kool & the gang;3071
fun lovin' criminals;3072
yann tiersen;3073
schäffer the darklord;3074
aiden;3075
metanoia;3076
the high dials;3077
charles bradley;3078
paul gilbert;3079
malvina reynolds;3080
iron & wine;3081
saturday looks good to me;3082
viktor vaughn;3083
isgaard;3084
frank sinatra;3085
alice cooper;3086
passenger;3087
michael nesmith;3088
ancient;3089
liza anne;3090
the magic numbers;3091
seraphim shock;3092
abney park;3093
guerilla maab;3094
orphaned land;3095
jack savoretti;3096
zombina and the skeletones;3097
leon fleisher;3098
rick springfield;3099
the left rights;3100
starflyer 59;3101
2 brothers on the 4th floor;3102
emery;3103
c.c. catch;3104
mick taylor;3105
good rats;3106
blackmore's night;3107
twisted sister;3108
boney james;3109
cancer;3110
maria jo o pires;3111
meiko;3112
cass mccombs;3113
deep dish;3114
information society;3115
blackie and the rodeo kings;3116
the fold;3117
ed harcourt;3118
ramshackle glory;3119
tom misch;3120
bebe winans;3121
joni mitchell;3122
elizabeth shepherd;3123
lil boosie;3124
celluloide;3125
house of lords;3126
j.d. souther;3127
midori goto;3128
shenandoah;3129
averi;3130
modwheelmood;3131
betty who;3132
burst;3133
pete shelley;3134
jon bon jovi;3135
x-perience;3136
xentrix;3137
grinderman;3138
hem;3139
the impressions;3140
sylosis;3141
the gregory brothers;3142
macklemore;3143
lizz wright;3144
sawyer brown;3145
capercaillie;3146
sam & dave;3147
the germs;3148
salt-n-pepa;3149
big sean;3150
stornoway;3151
arthur rubinstein;3152
cock robin;3153
big daddy;3154
fernando ortega;3155
ted nugent;3156
coptic rain;3157
kat deluna;3158
wolfsheim;3159
graham central station;3160
charles harrison;3161
lil jon & the east side boyz;3162
lovedrug;3163
our last night;3164
mura masa;3165
lacuna coil;3166
luke doucet;3167
punchline;3168
animal logic;3169
big bang;3170
sieges even;3171
g.g.f.h.;3172
tyler lyle;3173
the boswell sisters;3174
fightstar;3175
kevin rudolf;3176
dangerous toys;3177
comeback kid;3178
wild orchid;3179
one direction;3180
electric six;3181
the corrs;3182
christy nockels;3183
hillsong;3184
sham 69;3185
hurt;3186
clint black;3187
chris hillman & herb pedersen;3188
def leppard;3189
plants and animals;3190
jack bruce;3191
thalía;3192
arthur crudup;3193
phil wickham;3194
king gizzard & the lizard wizard;3195
beautiful eulogy;3196
the last bison;3197
caravan;3198
the wonder stuff;3199
jeff williams;3200
sebadoh;3201
jets overhead;3202
golden smog;3203
jonna lee;3204
the faceless;3205
daniel lanois;3206
the judds;3207
paragon;3208
vienna philharmonic;3209
electric president;3210
tenth avenue north;3211
solomon cutner;3212
vnv nation;3213
system syn;3214
eric lindell;3215
jenny lewis;3216
the flower kings;3217
chicago;3218
stacey q;3219
ark;3220
chingo bling;3221
isac elliot;3222
penumbra;3223
cypress hill;3224
landon pigg;3225
collide;3226
mick ronson;3227
fall of the leafe;3228
melissa etheridge;3229
suicidal tendencies;3230
joshua kadison;3231
the spill canvas;3232
eclipse;3233
within the ruins;3234
haemorrhage;3235
deliverance;3236
robert pollard;3237
paul kelly;3238
alesana;3239
the big pink;3240
e-rotic;3241
the books;3242
h.p. lovecraft historical society;3243
argent;3244
nivea;3245
mario;3246
the fiery furnaces;3247
atc;3248
matt and kim;3249
tobymac;3250
lcd soundsystem;3251
ant & dec;3252
howe gelb;3253
hozier;3254
astarte;3255
anathallo;3256
basia bulat;3257
tad morose;3258
smp;3259
greensky bluegrass;3260
holly miranda;3261
ub40;3262
d12;3263
scott miller;3264
chicane;3265
the faint;3266
cream;3267
archive;3268
deicide;3269
legendary shack shakers;3270
guano apes;3271
wolfgun;3272
typhoon;3273
pile;3274
the philosopher kings;3275
black grape;3276
kotipelto;3277
david lee murphy;3278
fish;3279
nekromantix;3280
the chordettes;3281
sherban lupu;3282
darlene zschech;3283
the henry girls;3284
ida cox;3285
peter & gordon;3286
fear, and loathing in las vegas;3287
jme;3288
blood on the dance floor;3289
electric light orchestra;3290
romeo;3291
mike heron;3292
kendrick lamar;3293
spock's beard;3294
the bunny the bear;3295
sweetbox;3296
mac miller;3297
spazz;3298
liars;3299
98°;3300
blood duster;3301
make do and mend;3302
cheri dennis;3303
gerald levert;3304
chris tomlin;3305
all saints;3306
ghost ship;3307
young the giant;3308
jim james;3309
the unseen;3310
whitesnake;3311
ciara;3312
seventh day slumber;3313
the b-52's;3314
crime & the city solution;3315
brentalfloss;3316
gene clark;3317
nouvelle vague;3318
don johnson big band;3319
shwayze;3320
cat rapes dog;3321
poisonblack;3322
the wildhearts;3323
bruce hornsby and the range;3324
logic;3325
vaux;3326
suzy bogguss;3327
sarke;3328
charlie louvin;3329
wild child;3330
icons of filth;3331
tarnation;3332
thousand foot krutch;3333
nikki webster;3334
wolfmother;3335
samuel barber;3336
panda bear;3337
ganggajang;3338
rosetta stone;3339
sonia disappearfear;3340
the s.o.s. band;3341
w.a.s.p.;3342
silverstein;3343
tin machine;3344
jennifer warnes;3345
andru donalds;3346
olympos mons;3347
cerebral fix;3348
glenn frey;3349
suzi quatro;3350
sons of the pioneers;3351
venetian princess;3352
delirious?;3353
lecrae;3354
marty robbins;3355
jeannie seely;3356
the psychedelic ensemble;3357
glenn medeiros;3358
bleed from within;3359
third world;3360
alison krauss;3361
dj shadow;3362
a static lullaby;3363
maps & atlases;3364
james taylor;3365
mekong delta;3366
easy star all-stars;3367
aura noir;3368
santana;3369
the gadjits;3370
can;3371
deepspace 5;3372
leslie west;3373
fr d ric chopin;3374
sundara karma;3375
powerman 5000;3376
dutch rebelle;3377
bones brigade;3378
the browning;3379
peter andre;3380
connie francis;3381
smooth;3382
majesty;3383
matthew dear;3384
fionn regan;3385
nuclear assault;3386
over the rhine;3387
magellan;3388
zoot woman;3389
shyne;3390
kidneythieves;3391
racoon;3392
ironsword;3393
alessandro scarlatti;3394
the appleseed cast;3395
operation ivy;3396
akron/family;3397
qkumba zoo;3398
the chasm;3399
the style council;3400
method man;3401
the halliard;3402
boy george;3403
tarot;3404
delays;3405
cromok;3406
honeymoon suite;3407
hercules & love affair;3408
lemuria;3409
the kelly family;3410
tweet;3411
tru;3412
lawnmower deth;3413
no use for a name;3414
dogwood;3415
penitent;3416
capture the crown;3417
hawksley workman;3418
the drifters;3419
school of seven bells;3420
james keelaghan;3421
the 69 eyes;3422
insane clown posse;3423
save ferris;3424
trust;3425
atb;3426
todd rundgren;3427
hank cochran;3428
forever the sickest kids;3429
new riders of the purple sage;3430
georg philipp telemann;3431
crippled black phoenix;3432
paul mccartney;3433
ryan adams and the cardinals;3434
silent cry;3435
king creosote;3436
run the jewels;3437
the kings;3438
steve goodman;3439
nitty gritty dirt band;3440
eric bibb;3441
editors;3442
blessed by a broken heart;3443
civil twilight;3444
children of bodom;3445
michael jackson;3446
richie sambora;3447
willam;3448
devildriver;3449
e.s.g.;3450
sananda maitreya;3451
babbie mason;3452
the museum;3453
dan zanes;3454
e-type;3455
gidon kremer;3456
al denson;3457
aeon;3458
kenny rogers;3459
the tear garden;3460
joshua bell;3461
set your goals;3462
le tigre;3463
holly dunn;3464
ignacy jan paderewski;3465
immortal;3466
108;3467
ann beretta;3468
dezarie;3469
pete seeger;3470
jan & dean;3471
nina kinert;3472
the lonely forest;3473
protest the hero;3474
diorama;3475
khoma;3476
japan;3477
jim brickman;3478
halifax;3479
rebecca lynn howard;3480
flobots;3481
the new power generation;3482
gods paparazzi;3483
shannon curfman;3484
roy woods;3485
varsity fanclub;3486
dave stewart;3487
mumford & sons;3488
syd matters;3489
the nerve agents;3490
kula shaker;3491
luke sital-singh;3492
john wesley harding;3493
venom;3494
melanie doane;3495
the hippos;3496
jessi colter;3497
the movielife;3498
babybird;3499
bodyfarm;3500
tara maclean;3501
jackson browne;3502
within temptation;3503
cash cash;3504
warren zevon;3505
eva cassidy;3506
billy ray cyrus;3507
justin timberlake;3508
the antlers;3509
john elefante;3510
angel;3511
jaap schr der;3512
evermore;3513
destroy the runner;3514
the black crowes;3515
tiga;3516
bury your dead;3517
slowdive;3518
machine head;3519
c-murder;3520
dirty;3521
fujiya & miyagi;3522
agonoize;3523
hundredth;3524
prong;3525
nikki flores;3526
cyanotic;3527
jordan pruitt;3528
u.s. bombs;3529
a halo called fred;3530
don williams;3531
chester watson;3532
noa;3533
my passion;3534
george harrison;3535
scandroid;3536
sister sin;3537
hot snakes;3538
black flag;3539
christie;3540
christoph eschenbach;3541
slim gaillard;3542
gravediggaz;3543
the northern pikes;3544
miranda sex garden;3545
colin blunstone;3546
the rasmus;3547
nazareth;3548
degarmo and key;3549
earth crisis;3550
the foreign exchange;3551
uncle tupelo;3552
kitty wells;3553
karate high school;3554
weeping tile;3555
skinny puppy;3556
audience;3557
devotchka;3558
john lennon;3559
elf power;3560
teresa brewer;3561
heinrich schiff;3562
6lack;3563
anti-depressive delivery;3564
the moffatts;3565
lynn anderson;3566
mordacious;3567
robert palmer;3568
seabound;3569
black star riders;3570
bryan rice;3571
j.b. lenoir;3572
the yards;3573
the stupid stupid henchmen;3574
tony rice;3575
barbecue bob;3576
dr. hook & the medicine show;3577
garrison starr;3578
union 13;3579
jay sean;3580
pretty willie;3581
oceano;3582
captain beefheart and the magic band;3583
markéta irglová;3584
wolfe tones;3585
crumbsuckers;3586
towers of london;3587
impaled nazarene;3588
big tent revival;3589
hans theessink;3590
the amity affliction;3591
animal liberation orchestra;3592
tower of power;3593
vanessa bell armstrong;3594
tom fogerty;3595
pianos become the teeth;3596
poppy;3597
the knack;3598
fields of the nephilim;3599
peter and the test tube babies;3600
cory branan;3601
cherrelle;3602
palaye royale;3603
southside johnny & the asbury jukes;3604
heideroosjes;3605
the busters;3606
dave edmunds;3607
buck 65;3608
the lost trailers;3609
steam powered giraffe;3610
lydia lunch;3611
washboard sam;3612
big time rush;3613
nlt;3614
old 97's;3615
rupert hine;3616
jon mclaughlin;3617
acid drinkers;3618
heaven & hell;3619
george hamilton iv;3620
scott joplin;3621
the berzerker;3622
jonatha brooke;3623
iggy azalea;3624
trijntje oosterhuis;3625
cee lo green;3626
wig wam;3627
mark heard;3628
phil manzanera;3629
the story;3630
kokomo arnold;3631
the beau brummels;3632
acid bath;3633
talisman;3634
mc chris;3635
the del mccoury band;3636
ty segall;3637
boysetsfire;3638
the ducky boys;3639
delta moon;3640
thumb;3641
blacklisted;3642
purity ring;3643
anders manga;3644
burn halo;3645
how to dress well;3646
maxnormal.tv;3647
binoculers;3648
lou barlow;3649
doug sahm;3650
gary allan;3651
jan wayne;3652
mörk gryning;3653
danko jones;3654
dover;3655
robben ford;3656
black breath;3657
lovespirals;3658
sérgio mendes;3659
rufus;3660
kingcrow;3661
dmitri shostakovich;3662
ghostface killah;3663
lyriel;3664
agathodaimon;3665
hezekiah walker;3666
nonpoint;3667
balzac;3668
lunatica;3669
ella mae morse;3670
toyah;3671
nik kershaw;3672
hammerfall;3673
jmsn;3674
speaker;3675
whitechapel;3676
haddaway;3677
grace vanderwaal;3678
the tubes;3679
eric church;3680
the o.c. supertones;3681
frozen ghost;3682
98 mute;3683
bobby darin;3684
voxtrot;3685
the darkness;3686
paris;3687
killswitch engage;3688
jessica lea mayfield;3689
n-trance;3690
hank williams, jr.;3691
b5;3692
thunderstone;3693
north mississippi allstars;3694
angry samoans;3695
alexis korner;3696
youssou n'dour;3697
jet set satellite;3698
boomkat;3699
leif ove andsnes;3700
ektomorf;3701
ceremony;3702
isley jasper isley;3703
r.a. the rugged man;3704
the twilight sad;3705
the electric prunes;3706
dream street;3707
the yardbirds;3708
turisas;3709
celldweller;3710
wigwam;3711
paul carrack;3712
dads;3713
per gessle;3714
eugene mcguinness;3715
hardline;3716
patti labelle;3717
busta rhymes;3718
rasputina;3719
antique;3720
schoolboy q;3721
beat happening;3722
punch brothers;3723
keshia chanté;3724
bob carlisle;3725
daan;3726
imperative reaction;3727
after the fire;3728
marc and the mambas;3729
minnie driver;3730
secondhand serenade;3731
the byrds;3732
bread;3733
talib kweli;3734
nelly;3735
hagalaz' runedance;3736
stick to your guns;3737
the virus;3738
snow;3739
james morrison;3740
yes;3741
neuroactive;3742
misanthrope;3743
morgana lefay;3744
che'nelle;3745
hackneyed;3746
wilhelm backhaus;3747
warren haynes;3748
proclamation;3749
andy griggs;3750
the bongos;3751
corrosion of conformity;3752
paloma faith;3753
the japanese house;3754
tizzy bac;3755
agathocles;3756
kari peitsamo;3757
mýa;3758
ignaz friedman;3759
blitz;3760
dyscarnate;3761
the hidden cameras;3762
egypt central;3763
noggin toboggan;3764
sadistik;3765
peter gabriel;3766
cinderella;3767
sebastian sturm;3768
psycroptic;3769
sylver;3770
jaymay;3771
nahemah;3772
dubioza kolektiv;3773
the pogues;3774
the arrogant sons of bitches;3775
nelson freire;3776
mastedon;3777
leonid kogan;3778
ezra furman;3779
ohgr;3780
edward maya;3781
wings;3782
freya;3783
skye;3784
ablaze my sorrow;3785
gerry rafferty;3786
franz schubert;3787
peccatum;3788
bananafishbones;3789
claude debussy;3790
eurythmics;3791
will oldham;3792
fiona boyes;3793
cellar darling;3794
public image ltd.;3795
fireflight;3796
afu-ra;3797
little mix;3798
power quest;3799
emmy the great;3800
sub7even;3801
gwen stefani;3802
playradioplay!;3803
aaron neville;3804
the stryder;3805
lullacry;3806
kelli ali;3807
bow wow;3808
igor stravinsky;3809
grade;3810
tall dwarfs;3811
rbd;3812
despised icon;3813
blues magoos;3814
the long winters;3815
majid jordan;3816
vincenzo bellini;3817
loretta lynn;3818
kenny wayne shepherd;3819
bleeding through;3820
cursive;3821
linus of hollywood;3822
g-unit;3823
the casket lottery;3824
kingston wall;3825
kate voegele;3826
amanda lear;3827
vicky beeching;3828
colin james;3829
elvis costello & the attractions;3830
dana dirksen;3831
c sar franck;3832
dog fashion disco;3833
josé gonzález;3834
the joy formidable;3835
elly ney;3836
meliah rage;3837
tim moore;3838
james blunt;3839
american pleasure club;3840
nicole scherzinger;3841
the bad shepherds;3842
the fabulous thunderbirds;3843
i can make a mess like nobody's business;3844
three plus;3845
julie roberts;3846
tribe;3847
luke bryan;3848
saxon;3849
philip glass;3850
bobby womack;3851
triumph;3852
veda hille;3853
coldrain;3854
en vogue;3855
amy winehouse;3856
bonnie pink;3857
the doc watson family;3858
bad company;3859
fuck;3860
cradle of filth;3861
gorky's zygotic mynci;3862
buck owens;3863
elle king;3864
samael;3865
meryn cadell;3866
nocturnal rites;3867
ghost dance;3868
kristy lee cook;3869
the script;3870
jake owen;3871
death cab for cutie;3872
the forgotten rebels;3873
beaver;3874
tom rush;3875
david rock feinstein;3876
oathbreaker;3877
odetta;3878
compton's most wanted;3879
psy'aviah;3880
big ed;3881
john miles;3882
shellac;3883
turin brakes;3884
eternal tears of sorrow;3885
kodak black;3886
slick rick;3887
code orange;3888
gioacchino rossini;3889
marc andr hamelin;3890
ulver;3891
la toya jackson;3892
path of resistance;3893
laura fygi;3894
tom mcrae;3895
sitti;3896
frank stokes;3897
anorexia nervosa;3898
gabriel cyphre;3899
the jon spencer blues explosion;3900
m-flo;3901
jason donovan;3902
sean paul;3903
girls;3904
504 boyz;3905
bring me the horizon;3906
shelby lynne;3907
pierre laurent aimard;3908
jim messina;3909
the boyz;3910
hank thompson;3911
hands;3912
zakk wylde;3913
nikka costa;3914
nelly furtado;3915
fairground attraction;3916
born of osiris;3917
fenix tx;3918
robert plant;3919
robert casadesus;3920
paul van dyk;3921
glenn kaiser band;3922
daughters;3923
saraya;3924
rae sremmurd;3925
norma jean;3926
the plot in you;3927
wild nothing;3928
apocalyptica;3929
tony! toni! toné!;3930
olivia newton-john;3931
keith richards;3932
elizabeth cook;3933
outlandish;3934
halford;3935
susan ashton;3936
doves;3937
queen + paul rodgers;3938
wishing chair;3939
pseudo echo;3940
crazyeightyeight;3941
red lights flash;3942
neon indian;3943
erick sermon;3944
the juliana theory;3945
larry gatlin & the gatlin brothers;3946
paul whiteman;3947
missy elliott;3948
the chiffons;3949
allison iraheta;3950
centvrion;3951
the 5th dimension;3952
dighayzoose;3953
nina nastasia;3954
shadowside;3955
5 seconds of summer;3956
asaf avidan;3957
cady groves;3958
old man luedecke;3959
the chap;3960
john maus;3961
tom petty and the heartbreakers;3962
have a nice life;3963
breach of trust;3964
claudio arrau;3965
toro y moi;3966
the tony danza tapdance extravaganza;3967
oscar brand;3968
john mccormack;3969
anti-nowhere league;3970
shearer;3971
j.j. cale;3972
iyaz;3973
ice ages;3974
the city harmonic;3975
bruce mcculloch;3976
the color changin' click;3977
crack the sky;3978
the moldy peaches;3979
coolio;3980
jaill;3981
mud;3982
hodgy beats;3983
clap your hands say yeah;3984
badly drawn boy;3985
ewert and the two dragons;3986
tystnaden;3987
janie fricke;3988
dottie west;3989
witherscape;3990
目黒将司 (shoji meguro);3991
mnek;3992
kurt nilsen;3993
psychic tv;3994
the boomtown rats;3995
nena;3996
stephin merritt;3997
nausea;3998
molly venter;3999
rasaq;4000
olly murs;4001
хелависа;4002
coko;4003
scarface;4004
sam smith;4005
midnight juggernauts;4006
pulley;4007
theresa sokyrka;4008
jacob banks;4009
bomb the music industry!;4010
meredith andrews;4011
yearning;4012
ron sexsmith;4013
joseph haydn;4014
einherjer;4015
four tops;4016
giles, giles and fripp;4017
iona brown;4018
bon jovi;4019
kunt and the gang;4020
motionless in white;4021
sabrina;4022
jillian aversa;4023
the persuasions;4024
tub ring;4025
sacred warrior;4026
susan boyle;4027
la the darkman;4028
savage garden;4029
lindisfarne;4030
jag panzer;4031
dana key;4032
wouter hamel;4033
peter green;4034
jill scott;4035
wreck and reference;4036
vanilla fudge;4037
aerosmith;4038
megafaun;4039
poundhound;4040
2 skinnee j's;4041
the call;4042
sonny boy williamson i;4043
possessed;4044
annie;4045
suburban legends;4046
the mutton birds;4047
cibo matto;4048
lorna shore;4049
terry callier;4050
selah sue;4051
derek webb;4052
parliament;4053
sister machine gun;4054
the badlees;4055
the regrettes;4056
marlon roudette;4057
albert king;4058
rebelution;4059
shura cherkassky;4060
dave rodgers;4061
skip james;4062
caedmon's call;4063
asp;4064
souldecision;4065
savoir adore;4066
nathaniel rateliff;4067
the temperance movement;4068
dragonland;4069
isaac alb niz;4070
falling up;4071
stabilo;4072
christ analogue;4073
big mountain;4074
sade;4075
the casting out;4076
gene autry;4077
manfred mann;4078
drive-by truckers;4079
javier mendoza;4080
alyson stoner;4081
blind lemon jefferson;4082
lowkey;4083
brad paisley;4084
jane child;4085
i declare war;4086
leo jan ek;4087
lonestar;4088
boards of canada;4089
don henley;4090
choclair;4091
obsc(y)re;4092
donnie mcclurkin;4093
wall of voodoo;4094
tony bennett;4095
hank snow;4096
poison the well;4097
fancy;4098
unbelievable truth;4099
jet life;4100
rory block;4101
rachael lampa;4102
the platters;4103
tahiti 80;4104
the gathering;4105
brett dennen;4106
lily allen;4107
puffy amiyumi;4108
iamx;4109
tree63;4110
bee gees;4111
e.g. daily;4112
narnia;4113
peter green splinter group;4114
gram parsons;4115
facing new york;4116
kalmah;4117
hot water music;4118
blessthefall;4119
general public;4120
cornershop;4121
owen pallett;4122
truth hurts;4123
quorthon;4124
kill your idols;4125
planet funk;4126
leonardo's bride;4127
jimmy rushing;4128
the title;4129
dog eat dog;4130
zucchero;4131
mind.in.a.box;4132
mainstay;4133
graham nash;4134
therion;4135
bad boys blue;4136
swizz beatz;4137
joey + rory;4138
matthew herbert;4139
krewella;4140
lou rhodes;4141
linda perry;4142
the pointer sisters;4143
fit for an autopsy;4144
yelworc;4145
jeffrey lewis;4146
arghoslent;4147
andy park;4148
uriah heep;4149
disciple;4150
sodom;4151
beth nielsen chapman;4152
chuck ragan;4153
spoons;4154
haerts;4155
ice nine kills;4156
rosie thomas;4157
guerilla toss;4158
dizzee rascal;4159
g4;4160
mendeed;4161
empire of the sun;4162
fergie;4163
mishka;4164
tom jones;4165
the brothers four;4166
racer x;4167
clipping.;4168
stiff little fingers;4169
the ghost of a saber tooth tiger;4170
ultimatum;4171
odyssey eurobeat;4172
tony banks;4173
shaggy;4174
kotoko;4175
nothingface;4176
mellowhype;4177
anthony green;4178
bulldozer;4179
abigail williams;4180
gangsta boo;4181
jonny lang;4182
j. cole;4183
edvard grieg;4184
the residents;4185
luther allison;4186
jean philippe rameau;4187
randy newman;4188
jeff the brotherhood;4189
king missile;4190
cloud nothings;4191
fiona;4192
the walkmen;4193
nicola benedetti;4194
birmingham 6;4195
edge of dawn;4196
nathan milstein;4197
the paul butterfield blues band;4198
he is legend;4199
ol' dirty bastard;4200
steve miller band;4201
craig's brother;4202
beracah;4203
new edition;4204
falconshield;4205
childish gambino;4206
am & shawn lee;4207
unkle;4208
silver jews;4209
the interrupters;4210
william shatner;4211
adrian belew;4212
scanner;4213
slash;4214
sweatshop union;4215
dayna kurtz;4216
krisma;4217
ferraby lionheart;4218
new years day;4219
madchild;4220
tohoshinki;4221
busted;4222
tyler bryant & the shakedown;4223
vanessa hudgens;4224
lady gaga;4225
kosheen;4226
circle ii circle;4227
glee cast;4228
forgotten tales;4229
renaissance;4230
lonnie johnson;4231
swans;4232
laura marling;4233
against the current;4234
ane brun;4235
the buckinghams;4236
art bears;4237
rocky votolato;4238
the soft boys;4239
jonathan thulin;4240
andreas johnson;4241
pages;4242
steve moakler;4243
a.c. newman;4244
showaddywaddy;4245
bart davenport;4246
the streets;4247
allstar weekend;4248
death grips;4249
seventh avenue;4250
grandmaster flash;4251
a split-second;4252
sense field;4253
maisey rika;4254
haken;4255
sharon needles;4256
arcangelo corelli;4257
teen suicide;4258
blue highway;4259
the megas;4260
noxious emotion;4261
keldian;4262
asian dub foundation;4263
h.e.r.;4264
girlpool;4265
eddie degarmo;4266
steve taylor;4267
alex story;4268
willy porter;4269
aura dione;4270
melanie b;4271
az;4272
chris isaak;4273
captain tractor;4274
anita baker;4275
anne akiko meyers;4276
josh rouse;4277
noname;4278
tina guo;4279
the wannadies;4280
the romantics;4281
firewind;4282
less than jake;4283
owain phyfe;4284
drag the river;4285
keith sweat;4286
kari jobe;4287
dispatch;4288
john stewart;4289
hellhammer;4290
reflection eternal;4291
the last dance;4292
ultimate fakebook;4293
nomad;4294
coal chamber;4295
juice;4296
gotthard;4297
richard wagner;4298
the strypes;4299
felix mendelssohn bartholdy;4300
lou gramm;4301
necro;4302
vladimir ashkenazy;4303
the datsuns;4304
green jellÿ;4305
johnny duncan;4306
thingy;4307
jimmie rodgers;4308
paul brady;4309
hazel dickens;4310
claude king;4311
wynardtage;4312
craig morgan;4313
japandroids;4314
the almanac singers;4315
memphis slim;4316
eddie rabbitt;4317
mortification;4318
escape the fate;4319
tonight alive;4320
tyler, the creator;4321
cubanate;4322
alvin youngblood hart;4323
steel train;4324
natasha thomas;4325
violent soho;4326
laika;4327
michael bublé;4328
folly & the hunter;4329
nektar;4330
carnifex;4331
johan;4332
matthew good;4333
backyard babies;4334
jim kweskin;4335
beat crusaders;4336
bullet for my valentine;4337
oceans ate alaska;4338
the communards;4339
quo vadis;4340
alessi brothers;4341
1910 fruitgum company;4342
attila;4343
christian bautista;4344
ian anderson;4345
the grates;4346
ethel waters;4347
urthboy;4348
burlap to cashmere;4349
nails;4350
pyotr ilyich tchaikovsky;4351
keely smith;4352
milemarker;4353
truls m rk;4354
snoop dogg;4355
akrobatik;4356
jorge bolet;4357
jonny diaz;4358
killer dwarfs;4359
birth control;4360
write this down;4361
high school football heroes;4362
the frames;4363
angelo branduardi;4364
casey bill weldon;4365
steve green;4366
bill anderson;4367
jin akanishi;4368
kimbra;4369
nevermore;4370
garrick ohlsson;4371
nolongerhuman;4372
ewan maccoll;4373
the incredible string band;4374
hayden;4375
body count;4376
charlie parr;4377
andrew w.k.;4378
sarah blasko;4379
vetiver;4380
борис гребенщиков;4381
stephen marley;4382
sage francis;4383
fischerspooner;4384
the strokes;4385
bootsauce;4386
maurice ravel;4387
mini mansions;4388
heartsounds;4389
vangelis;4390
artension;4391
bear's den;4392
mystic circle;4393
katie melua;4394
the mission;4395
buddy & julie miller;4396
2 live crew;4397
calexico;4398
danielle peck;4399
devil doll;4400
the mad conductor;4401
eliane elias;4402
brutal truth;4403
bootsy collins;4404
saving abel;4405
robby valentine;4406
vader;4407
midnattsol;4408
jigsaw;4409
bullets and octane;4410
company of thieves;4411
kylie minogue;4412
sandi thom;4413
norah jones;4414
the dillards;4415
bulletboys;4416
high places;4417
neon horse;4418
anna ternheim;4419
alessia cara;4420
wolfgang schneiderhan;4421
tzu;4422
t. rex;4423
treble charger;4424
culture;4425
gaelic storm;4426
elliott smith;4427
memoryhouse;4428
the amazing rhythm aces;4429
balance of power;4430
coil;4431
hot rod circuit;4432
the guess who;4433
fredrika stahl;4434
da' t.r.u.t.h.;4435
toh kay;4436
gentle giant;4437
roger whittaker;4438
the dø;4439
neil sedaka;4440
the view;4441
the lookouts;4442
twrp;4443
tinie tempah;4444
dana winner;4445
civil war;4446
haujobb;4447
the crucified;4448
tv on the radio;4449
gil scott-heron;4450
marit larsen;4451
robert calvert;4452
ja rule;4453
tina charles;4454
ima robot;4455
neva dinova;4456
thaurorod;4457
lou christie;4458
scritti politti;4459
whiskeytown;4460
while she sleeps;4461
kristine w;4462
zilch;4463
the avett brothers;4464
the human league;4465
ilse delange;4466
tinashe;4467
mc ren;4468
exciter;4469
papa roach;4470
frank turner;4471
sol invictus;4472
anya marina;4473
these new puritans;4474
instalok;4475
tom smith;4476
maria muldaur;4477
e-40;4478
holly golightly;4479
my bloody valentine;4480
defiance;4481
allison moorer;4482
wynn stewart;4483
haggard;4484
all shall perish;4485
fritz kreisler;4486
dead prez;4487
lenny kravitz;4488
lagwagon;4489
damian marley;4490
icehouse;4491
dionne warwick;4492
the devil makes three;4493
drive, she said;4494
hollow haze;4495
anita carter;4496
h.e.a.t;4497
slaves on dope;4498
arsis;4499
samson;4500
swv;4501
l.a. guns;4502
hiatus kaiyote;4503
quicksilver messenger service;4504
lea michele;4505
graf orlock;4506
tiago iorc;4507
great northern;4508
ray wylie hubbard;4509
super junior;4510
jerry jeff walker;4511
hocico;4512
bukimina;4513
stretch;4514
anthony hamilton;4515
mushroomhead;4516
evergreen terrace;4517
michael mcdonald;4518
leroy hutson;4519
structures;4520
dance gavin dance;4521
other people;4522
linda davis;4523
doc walker;4524
rumer;4525
chuck berry;4526
shoffy;4527
dickey lee;4528
freddy fender;4529
batmobile;4530
the jimi hendrix experience;4531
teddy pendergrass;4532
bushwick bill;4533
gary moore;4534
arthur beatrice;4535
hurrah!;4536
tsunami bomb;4537
thundra;4538
herb alpert & the tijuana brass;4539
the flashbulb;4540
korpiklaani;4541
the mendoza line;4542
vashti bunyan;4543
unknown mortal orchestra;4544
jerry garcia;4545
richard shindell;4546
i fight dragons;4547
late tuesday;4548
chris connor;4549
the chemical brothers;4550
virgin steele;4551
sybreed;4552
dan le sac vs scroobius pip;4553
lani hall;4554
nero;4555
twin shadow;4556
gordon downie;4557
aphrodite's child;4558
bodyjar;4559
magic man;4560
baths;4561
tender forever;4562
of monsters and men;4563
krypteria;4564
jandek;4565
r.i.o.;4566
government issue;4567
jane monheit;4568
jeffree star;4569
moby grape;4570
silver convention;4571
michelle wright;4572
the shroud;4573
joseph szigeti;4574
les claypool;4575
the kills;4576
beth hart;4577
bobo in white wooden houses;4578
twilight force;4579
larry sparks;4580
eddie vedder;4581
glen campbell;4582
michael hedges;4583
the red shore;4584
gloria gaynor;4585
m2m;4586
altered images;4587
youth lagoon;4588
xv;4589
kevorkian death cycle;4590
the sugarcubes;4591
cryptic slaughter;4592
candy butchers;4593
8ball;4594
jhené aiko;4595
marcelle meyer;4596
gaither vocal band;4597
the boy least likely to;4598
dean martin;4599
journey;4600
jackie evancho;4601
esperanza spalding;4602
the time;4603
tampa red;4604
poison;4605
demis roussos;4606
maria solheim;4607
living colour;4608
brutality;4609
avatar;4610
katie armiger;4611
paul wilbur;4612
tokio hotel;4613
larue;4614
golden earring;4615
saint etienne;4616
the go! team;4617
james gang;4618
master p;4619
billie holiday;4620
they might be giants;4621
paul simon;4622
flashlight brown;4623
dreezy;4624
david byron;4625
kim boyce;4626
masta killa;4627
dmx;4628
flesh-n-bone;4629
june carter cash;4630
crucial conflict;4631
persephone;4632
asher roth;4633
angel dust;4634
johnny crash;4635
amy ray;4636
lang lang;4637
two hours traffic;4638
attrition;4639
carole king;4640
sarah mclachlan;4641
adolf busch;4642
absolution project;4643
justin bieber;4644
penny mclean;4645
velvet belly;4646
matt nathanson;4647
holly cole;4648
diana vickers;4649
ella mai;4650
mark eitzel;4651
the associates;4652
vortech;4653
in strict confidence;4654
helloween;4655
chasing victory;4656
ninety pound wuss;4657
jackie gleason;4658
be bop deluxe;4659
lit;4660
the bruisers;4661
amanda jenssen;4662
crowder;4663
ulcerate;4664
mewithoutyou;4665
joe purdy;4666
t-bone walker;4667
sarah brightman;4668
chihiro onitsuka;4669
trophy eyes;4670
the receiving end of sirens;4671
king's x;4672
chad brownlee;4673
battlelore;4674
refused;4675
far-less;4676
brandy;4677
black stone cherry;4678
regina spektor;4679
house of pain;4680
220 volt;4681
ty herndon;4682
tarja;4683
aaron tippin;4684
slightly stoopid;4685
nic jones;4686
animal collective;4687
converge;4688
freddie king;4689
kelela;4690
the samples;4691
draconian;4692
the klezmatics;4693
rob crow;4694
country joe and the fish;4695
cage;4696
benny sings;4697
wolfstone;4698
w-inds.;4699
lin-manuel miranda;4700
azealia banks;4701
zap mama;4702
death threat;4703
will stratton;4704
audio adrenaline;4705
jars of clay;4706
shamir;4707
miranda cosgrove;4708
ginuwine;4709
hell razah;4710
thy primordial;4711
weh;4712
jason aldean;4713
philippe entremont;4714
smog;4715
benny hester;4716
otis redding;4717
the methadones;4718
karnivool;4719
tennessee ernie ford;4720
bob rivers;4721
r. kelly;4722
gang starr;4723
king crimson;4724
as we fight;4725
sons of butcher;4726
of the wand & the moon;4727
the cars;4728
salt the wound;4729
fair to midland;4730
lydia;4731
terri clark;4732
chris chameleon;4733
the project hate mcmxcix;4734
shadow gallery;4735
black francis;4736
gym class heroes;4737
bill nelson;4738
an cafe;4739
smokey robinson;4740
the duhks;4741
broadway;4742
fetty wap;4743
billy sprague;4744
norther;4745
six feet under;4746
republica;4747
liam finn;4748
highway 101;4749
maureen mcgovern;4750
dim mak;4751
the go-betweens;4752
old man's child;4753
fall out boy;4754
mia x;4755
bbmak;4756
cannabis corpse;4757
ludacris;4758
creature feature;4759
miss montreal;4760
eric burdon & the animals;4761
bumblefoot;4762
origin;4763
capital cities;4764
doro;4765
nox arcana;4766
hateen;4767
the band perry;4768
metallica;4769
buffalo springfield;4770
the hooters;4771
peaches & herb;4772
krystal meyers;4773
paul rodgers;4774
memphis jug band;4775
sofie;4776
maurizio pollini;4777
brook benton;4778
die verbannten kinder evas;4779
brainiac;4780
bearstronaut;4781
pierce the veil;4782
matthew friedberger;4783
victims family;4784
hell or highwater;4785
emerson drive;4786
caro emerald;4787
louis jordan;4788
the osmonds;4789
unspoken;4790
craig cardiff;4791
tomohisa yamashita;4792
ari hest;4793
dj jazzy jeff & the fresh prince;4794
dj drama & lil wayne;4795
ziggy;4796
waters;4797
シド (sid);4798
jeannie c. riley;4799
tilly and the wall;4800
karen matheson;4801
blind passengers;4802
brotha lynch hung;4803
harry nilsson;4804
young money;4805
marike jager;4806
good shoes;4807
tom tom club;4808
animaniacs;4809
solitude aeturnus;4810
george strait;4811
akinyele;4812
the honorary title;4813
carl wilson;4814
irving;4815
izzy stradlin;4816
the carter family;4817
gzr;4818
axxis;4819
marc e. bassy;4820
chipmunk;4821
gigi d'agostino;4822
tindersticks;4823
the dears;4824
the ocean;4825
raul malo;4826
joel plaskett;4827
trooper;4828
arnold schoenberg;4829
zayn;4830
hellsongs;4831
leftöver crack;4832
the waterboys;4833
levon helm;4834
gehenna;4835
force majeure;4836
take 6;4837
amy rigby;4838
lil baby;4839
north star;4840
mott the hoople;4841
dorsal atlântica;4842
sabrina carpenter;4843
julee cruise;4844
blu cantrell;4845
willard grant conspiracy;4846
faron young;4847
celtic woman;4848
tony joe white;4849
petal;4850
titus andronicus;4851
son house;4852
hungry lights;4853
eskimo callboy;4854
marnie;4855
seth walker;4856
the herd;4857
asobi seksu;4858
david crosby;4859
billy joe royal;4860
jinkx monsoon;4861
the replacements;4862
iio;4863
binärpilot;4864
young guns;4865
mel tormé;4866
the baseballs;4867
aphex twin;4868
mike ness;4869
snot;4870
blind willie mctell;4871
icon for hire;4872
jeff tweedy;4873
swollen members;4874
carly rae jepsen;4875
crystal viper;4876
larry the cable guy;4877
andre nickatina & equipto;4878
raphael saadiq;4879
mesmerize;4880
in flames;4881
kristy thirsk;4882
headlights;4883
deathspell omega;4884
joan armatrading;4885
red red meat;4886
devlin;4887
ladyhawke;4888
ween;4889
mates of state;4890
husky rescue;4891
rhett akins;4892
godsmack;4893
grandpa jones;4894
matt maher;4895
ill niño;4896
flatfoot 56;4897
josh garrels;4898
obey the brave;4899
damh the bard;4900
sara lov;4901
charles aznavour;4902
thin lizzy;4903
fatherson;4904
tristania;4905
binary star;4906
warlock;4907
ray j;4908
jamie grace;4909
mildred bailey;4910
heartsrevolution;4911
the dreaming;4912
the feelies;4913
lucy rose;4914
artifacts;4915
m (uk);4916
the nits;4917
the outfield;4918
freelance whales;4919
4minute;4920
babyland;4921
propagandhi;4922
lil johnson;4923
digital summer;4924
the tenors;4925
simply red;4926
de-phazz;4927
benny goodman;4928
foxy brown;4929
brainstorm;4930
helen kane;4931
les humphries singers;4932
the prids;4933
bruce cockburn;4934
superbus;4935
ben e. king;4936
cryonic temple;4937
renard;4938
tom robinson;4939
the cover girls;4940
i, the breather;4941
worm is green;4942
commodores;4943
julie doiron;4944
portugal. the man;4945
mike & the mechanics;4946
7 year bitch;4947
venetian snares;4948
johnny mercer;4949
grandaddy;4950
malia;4951
azure ray;4952
carolina liar;4953
nitronoise;4954
rufus wainwright;4955
the internet;4956
j-zone;4957
coronatus;4958
celestial season;4959
jimmy buffett;4960
freeway;4961
danielson;4962
zwan;4963
boys night out;4964
julia jacklin;4965
the fair sex;4966
okkervil river;4967
the raveonettes;4968
wilson phillips;4969
elane;4970
andi deris;4971
rocking chairs;4972
michael johnson;4973
rush;4974
the oak ridge boys;4975
rev. edward w. clayborn;4976
bonnie bianco;4977
ryan bingham;4978
jeremy enigk;4979
godhead;4980
before their eyes;4981
arsonists get all the girls;4982
empyrium;4983
bedlight for blue eyes;4984
rehab;4985
dead sara;4986
snafu;4987
roy acuff;4988
the partridge family;4989
malcolm holcombe;4990
portastatic;4991
thievery corporation;4992
elysium;4993
the other;4994
beardfish;4995
vanessa amorosi;4996
babylon whores;4997
pink floyd;4998
nana grizol;4999
charly mcclain;5000
chico debarge;5001
ten masked men;5002
beloved;5003
the shamen;5004
charlie peacock;5005
andr s schiff;5006
castanets;5007
cold cave;5008
michael schenker group;5009
house vs. hurricane;5010
the pentangle;5011
weerd science;5012
ghost brigade;5013
daysend;5014
manfred mann's earth band;5015
electrelane;5016
smokin' joe kubek & bnois king;5017
walter becker;5018
el perro del mar;5019
heaven & earth;5020
amebix;5021
stanfour;5022
vallenfyre;5023
tankard;5024
circa waves;5025
louis lortie;5026
hammock;5027
birdman;5028
esther phillips;5029
garnet rogers;5030
icona pop;5031
chvrches;5032
donald fagen;5033
kim mitchell;5034
canibus;5035
woods of ypres;5036
four letter lie;5037
eug ne ysa e;5038
biz markie;5039
tila tequila;5040
blue mountain;5041
buddy holly;5042
rootwater;5043
obie trice;5044
ferlin husky;5045
hinds;5046
emmylou harris;5047
hop along;5048
ian moore;5049
roger;5050
micky & the motorcars;5051
bebel gilberto;5052
rod stewart;5053
richard smallwood;5054
deftones;5055
suicide silence;5056
the wrights;5057
rudolf firku n ;5058
god module;5059
gregory alan isakov;5060
angus & julia stone;5061
real mccoy;5062
kate miller-heidke;5063
eric's trip;5064
woe, is me;5065
rex orange county;5066
jascha heifetz;5067
novembre;5068
ministry;5069
graveyard;5070
tone damli;5071
alien sex fiend;5072
eazy-e;5073
jj72;5074
sublime;5075
george formby;5076
trapeze;5077
mikhail pletnev;5078
roger clyne & the peacemakers;5079
rita coolidge;5080
xxxtentacion;5081
liz durrett;5082
collective soul;5083
second person;5084
mat kearney;5085
mr. 3-2;5086
ezio;5087
the republic of wolves;5088
stepdad;5089
mike oldfield;5090
red sun rising;5091
snfu;5092
saul williams;5093
justin townes earle;5094
upon a burning body;5095
bruce hungerford;5096
jeff scott soto;5097
big business;5098
dan tyminski;5099
infected mushroom;5100
elend;5101
john waite;5102
hilary hahn;5103
austin mahone;5104
pretty girls make graves;5105
billy idol;5106
stephen malkmus;5107
moddi;5108
galahad;5109
diana krall;5110
the be good tanyas;5111
!distain;5112
the isley brothers;5113
marilyn manson;5114
blackthorn;5115
ordinary time;5116
dragonforce;5117
i blame coco;5118
this providence;5119
t-pain;5120
capleton;5121
dc talk;5122
leon redbone;5123
optimus rhyme;5124
zedd;5125
black label society;5126
gary brooker;5127
melvins;5128
mindless faith;5129
the warning;5130
bombshell rocks;5131
the unthanks;5132
secrets;5133
joan jett and the blackhearts;5134
funeral for a friend;5135
aorta;5136
roger glover;5137
nitzer ebb;5138
amber pacific;5139
sneaker pimps;5140
insomnium;5141
danger radio;5142
lay low;5143
russ;5144
bliss n eso;5145
dj antoine;5146
to kill a king;5147
dubstar;5148
by the tree;5149
imelda may;5150
emil gilels;5151
redbone;5152
the highwaymen;5153
fear;5154
ry cooder;5155
ludo;5156
mance lipscomb;5157
shawn colvin;5158
bongzilla;5159
the promise ring;5160
dr. dog;5161
ronnie dunn;5162
the buffoons;5163
aimee mann;5164
chase & status;5165
rose maddox;5166
lights;5167
akissforjersey;5168
tommy shaw;5169
rotersand;5170
x japan;5171
richie furay;5172
provision;5173
gordon lightfoot;5174
primus;5175
die sektor;5176
megadeth;5177
agnetha fältskog;5178
angelspit;5179
machine gun kelly;5180
father;5181
cherry ghost;5182
nana;5183
ensign;5184
björk;5185
styx;5186
cinema bizarre;5187
tiamat;5188
chris mills;5189
rachael sage;5190
prāta vētra;5191
the hold steady;5192
phil lynott;5193
brian hyland;5194
we as human;5195
the wallflowers;5196
kalan porter;5197
freddie hart;5198
corey crowder;5199
the angels of light;5200
papermoon;5201
tommy mcclennan;5202
the paper chase;5203
ikon;5204
happy monster band;5205
modern talking;5206
philadelphia orchestra;5207
hellyeah;5208
heart of a coward;5209
state property;5210
howling bells;5211
shalamar;5212
the geraldine fibbers;5213
toots & the maytals;5214
walter trout;5215
michael o'brien;5216
sweet;5217
hate eternal;5218
carnival in coal;5219
céline dion;5220
lee hazlewood;5221
amy holland;5222
defleshed;5223
irma thomas;5224
the chieftains;5225
dexter freebish;5226
the lads;5227
peter bradley adams;5228
front line assembly;5229
blindside;5230
vulfpeck;5231
kontrust;5232
smosh;5233
boy & bear;5234
cruachan;5235
berried alive;5236
the raconteurs;5237
dälek;5238
julie andrews;5239
spoon;5240
mad caddies;5241
she keeps bees;5242
martha tilston;5243
le butcherettes;5244
the vines;5245
mothers;5246
biohazard;5247
doug macleod;5248
down;5249
maestro fresh-wes;5250
boston;5251
oh susanna;5252
goldfrapp;5253
sons of bill;5254
fun people;5255
the crown;5256
tim maia;5257
sevdaliza;5258
the little willies;5259
cupcakke;5260
poxy boggards;5261
damon intrabartolo;5262
mostly autumn;5263
jim reeves;5264
dir en grey;5265
robin beck;5266
the sounds;5267
migos;5268
de/vision;5269
larry santos;5270
combichrist;5271
milla jovovich;5272
luba;5273
sharon van etten;5274
forevermore;5275
roger daltrey;5276
lunik;5277
maroon;5278
the rolling stones;5279
jon secada;5280
yehudi menuhin;5281
stompin' tom connors;5282
r.l. burnside;5283
a tortured soul;5284
con hunley;5285
the supernaturals;5286
the kooks;5287
jeff beck;5288
pokey lafarge;5289
watermark;5290
au revoir simone;5291
matthew barber;5292
u-god;5293
blaze bayley;5294
haste the day;5295
chase rice;5296
ariana grande;5297
bukka white;5298
skew siskin;5299
monster magnet;5300
the oh hellos;5301
the pop group;5302
haim;5303
bay city rollers;5304
mustasch;5305
mc magic;5306
sherbet;5307
the tea party;5308
the choir;5309
woody guthrie;5310
hypocrisy;5311
big maceo;5312
the psychedelic furs;5313
ariel pink;5314
fourplay;5315
paw;5316
beirut;5317
french kicks;5318
the ronettes;5319
the durutti column;5320
therefore i am;5321
d-a-d;5322
eric martin;5323
andrea schroeder;5324
john hiatt;5325
incantation;5326
lisa marie presley;5327
high and mighty color;5328
vonda shepard;5329
asphyx;5330
israel vibration;5331
lordi;5332
nikki yanofsky;5333
the box tops;5334
jorma kaukonen;5335
juice newton;5336
woody's a girl;5337
matthew logan vasquez;5338
aretha franklin;5339
buddy miller;5340
ayreon;5341
mediæval bæbes;5342
accept;5343
robert schuman;5344
mina;5345
will.i.am;5346
marc almond;5347
nomeansno;5348
defeater;5349
suggs;5350
grobschnitt;5351
amon düül ii;5352
feeling left out;5353
breaking benjamin;5354
george michael;5355
bracket;5356
all-4-one;5357
niccol paganini;5358
la coka nostra;5359
ozzy osbourne;5360
the bobs;5361
jay reatard;5362
satellites;5363
gwen stacy;5364
soulspell;5365
anchor;5366
girls' generation;5367
lacy j. dalton;5368
lil mama;5369
florrie;5370
lesley gore;5371
sara k.;5372
upon this dawning;5373
barry adamson;5374
christine and the queens;5375
timbaland;5376
pj morton;5377
the divine comedy;5378
bascom lamar lunsford;5379
cilla black;5380
spank rock;5381
jeff healey;5382
molotov solution;5383
matthew fisher;5384
francis poulenc;5385
anata;5386
sara watkins;5387
amy macdonald;5388
coldworker;5389
london symphony;5390
danny brown;5391
kandi;5392
bic runga;5393
iggy pop;5394
die toten hosen;5395
tuck & patti;5396
richard goode;5397
blind boy fuller;5398
sonny & cher;5399
jeremih;5400
spiritual front;5401
d'espairsray;5402
shirley bassey;5403
annuals;5404
bros;5405
charlotte martin;5406
ramones;5407
paper route;5408
cretin;5409
streetwalkers;5410
number one gun;5411
the smithereens;5412
belvedere;5413
tech n9ne;5414
corinne bailey rae;5415
dawn of ashes;5416
into eternity;5417
corb lund;5418
faz l say;5419
current swell;5420
albert collins;5421
lita ford;5422
rudimental;5423
nightmare of you;5424
josef lh vinne;5425
stephen bishop;5426
richie kotzen;5427
run-d.m.c.;5428
the moody blues;5429
divinyls;5430
good old war;5431
anders johansson;5432
gandalf's fist;5433
baccara;5434
miniature tigers;5435
rare earth;5436
the walker brothers;5437
molly johnson;5438
we five;5439
the verve pipe;5440
the foreshadowing;5441
sam the sham & the pharaohs;5442
fifth harmony;5443
madonna;5444
juluka;5445
dynazty;5446
michael schulte;5447
thompson twins;5448
lil wyte;5449
mike batt;5450
trial;5451
cisco houston;5452
callisto;5453
darkest hour;5454
brian may;5455
david wilcox;5456
napalm death;5457
never heard of it;5458
jj grey & mofro;5459
eric andersen;5460
billie piper;5461
chromeo;5462
youri egorov;5463
aaron lewis;5464
vince neil;5465
buddy guy & junior wells;5466
róisín murphy;5467
in the woods...;5468
aesma daeva;5469
electric guest;5470
murray mclauchlan;5471
thee oh sees;5472
nick kamen;5473
eloy;5474
brian eno;5475
rabbit junk;5476
suidakra;5477
mint condition;5478
extreme;5479
kelley stoltz;5480
mattafix;5481
tiara thomas;5482
fugees;5483
warpaint;5484
selena gomez & the scene;5485
hot boy$;5486
allen toussaint;5487
skrillex;5488
john schneider;5489
midlake;5490
the supremes;5491
rodney crowell;5492
everyone everywhere;5493
theatre of tragedy;5494
s.f.a.;5495
dreamtale;5496
count bass d;5497
ivy sole;5498
bobby blue bland;5499
popcaan;5500
tab benoit;5501
grinspoon;5502
grizzly bear;5503
kirk franklin;5504
grateful dead;5505
21 guns;5506
scouting for girls;5507
fay lovsky;5508
the fullblast;5509
death;5510
the elected;5511
transvision vamp;5512
keith urban;5513
left spine down;5514
the nylons;5515
alien ant farm;5516
otep;5517
ashton shepherd;5518
paradise lost;5519
hello saferide;5520
john kay;5521
the beach boys;5522
gregory porter;5523
ricky martin;5524
wayne hancock;5525
youn sun nah;5526
winger;5527
havoc;5528
los campesinos!;5529
shaun cassidy;5530
chevelle;5531
barbara mason;5532
rita wilson;5533
richie havens;5534
scythe;5535
d.r.i.;5536
matt andersen;5537
fifth angel;5538
trail of tears;5539
asaf avidan & the mojos;5540
christopher lee;5541
tripod;5542
crywank;5543
tank;5544
tom paxton;5545
leon russell;5546
adam green;5547
anarbor;5548
the unicorns;5549
evidence;5550
stetsasonic;5551
the gabe dixon band;5552
prince;5553
day26;5554
rhythms del mundo;5555
saviour machine;5556
alina simone;5557
dick haymes;5558
hugh laurie;5559
jc chasez;5560
johnny clegg & savuka;5561
rivers of nihil;5562
overkill;5563
guy;5564
memphis slim & willie dixon;5565
jocelyn & chris arndt;5566
mechanical moth;5567
pat benatar;5568
eden's curse;5569
gene pitney;5570
rodriguez;5571
jamala;5572
jerry garcia band;5573
demon;5574
backstreet boys;5575
cocorosie;5576
savatage;5577
rosemary clooney;5578
amerie;5579
ian dury and the blockheads;5580
pantokrator;5581
the lox;5582
supertramp;5583
carnal forge;5584
this is hell;5585
papooz;5586
julia holter;5587
traffic;5588
gary lewis & the playboys;5589
leopold godowsky;5590
inferi;5591
remembering never;5592
the radio dept.;5593
blind willie johnson;5594
gary chapman;5595
mutual benefit;5596
dragonette;5597
crooked fingers;5598
black mountain;5599
shampoo;5600
onslaught;5601
big moe;5602
the tragically hip;5603
dead by april;5604
john parr;5605
chameleon circuit;5606
all;5607
greeley estates;5608
herbie hancock;5609
karmakanic;5610
coffin break;5611
blood orange;5612
alborosie;5613
aeternus;5614
rich boy;5615
cledus t. judd;5616
bobby brown;5617
zebra;5618
scott matthew;5619
winter's bane;5620
kane & abel;5621
jackson c. frank;5622
maura o'connell;5623
color me badd;5624
christina aguilera;5625
3rd bass;5626
danny gokey;5627
galactic cowboys;5628
sabaton;5629
howard jones;5630
s.p.o.c.k;5631
heather alexander;5632
ingested;5633
terror jr;5634
enuff z'nuff;5635
the gothic archies;5636
robert ellis;5637
nancy wilson;5638
dead kennedys;5639
milow;5640
hall & oates;5641
the mynabirds;5642
grass widow;5643
mew;5644
chris young;5645
crest of darkness;5646
b.j. thomas;5647
sister sledge;5648
john lee hooker and canned heat;5649
screeching weasel;5650
cassandra wilson;5651
terry reid;5652
maps;5653
katy perry;5654
swmrs;5655
neurotech;5656
george gershwin;5657
april wine;5658
powerwolf;5659
yellowcard;5660
the kry;5661
barbarossa;5662
blackguard;5663
rjd2;5664
angelo de augustine;5665
brand nubian;5666
iron reagan;5667
snap!;5668
the expos;5669
paula abdul;5670
bahamas;5671
olive;5672
gene simmons;5673
augustana;5674
vicious crusade;5675
mennen;5676
arsonists;5677
fred penner;5678
amen;5679
mae;5680
the stylistics;5681
bill monroe;5682
aeon zen;5683
paul williams;5684
ultraviolet sound;5685
omnia;5686
dave cousins;5687
silent stream of godless elegy;5688
david lindley;5689
title fight;5690
stevie nicks;5691
disturbed;5692
the lumineers;5693
wondermints;5694
necromantia;5695
anton bruckner;5696
john hammond;5697
counterparts;5698
the pursuit of happiness;5699
dougie maclean;5700
domo genesis;5701
keaton henson;5702
the electric hellfire club;5703
casting crowns;5704
her space holiday;5705
lindi ortega;5706
toy dolls;5707
kobra and the lotus;5708
velvet acid christ;5709
hafdís huld;5710
dead infection;5711
blues traveler;5712
hawthorne heights;5713
emf;5714
the secret handshake;5715
einstürzende neubauten;5716
rednex;5717
aztec camera;5718
heart in hand;5719
easyworld;5720
shlomo mintz;5721
earl wild;5722
french montana;5723
prime sth;5724
craig david;5725
blind pilot;5726
stratovarius;5727
nina nesbitt;5728
fiddler's green;5729
skyclad;5730
caitlyn smith;5731
daniel lavoie;5732
diamond rio;5733
the four lads;5734
die warzau;5735
funker vogt;5736
black tusk;5737
bob seger;5738
labyrinth;5739
teodasia;5740
magnapop;5741
dødheimsgard;5742
barrio boyzz;5743
jesse malin;5744
the brothers johnson;5745
the obsessed;5746
lucky boys confusion;5747
lemon jelly;5748
cock sparrer;5749
itzhak perlman;5750
amberian dawn;5751
moxy früvous;5752
ugress;5753
the thermals;5754
common;5755
torres;5756
badlands;5757
ron kenoly;5758
wide mouth mason;5759
run kid run;5760
qntal;5761
patty larkin;5762
the answer;5763
la bouche;5764
abba;5765
melanie thornton;5766
limp bizkit;5767
danny wilde;5768
against all authority;5769
志方あきこ;5770
a plea for purging;5771
chris caffery;5772
7l & esoteric;5773
jagged edge;5774
allo darlin';5775
domenico scarlatti;5776
b-legit;5777
hundreds;5778
jonathan richman and the modern lovers;5779
catharsis;5780
son volt;5781
electric valentine;5782
gino vannelli;5783
call the cops;5784
miss may i;5785
double you;5786
the soul stirrers;5787
adrenaline mob;5788
timothy seth avett as darling;5789
raging fyah;5790
ナイトメア (nightmare);5791
selena gomez;5792
franco battiato;5793
sons of seasons;5794
aaron shust;5795
august alsina;5796
ghoul;5797
mustard plug;5798
the white stripes;5799
dead stop;5800
slim;5801
project 86;5802
lower dens;5803
stephen fretwell;5804
off!;5805
psychostick;5806
radney foster;5807
black uhuru;5808
one without;5809
the presidents of the united states of america;5810
phil ochs;5811
stealers wheel;5812
the angels;5813
joy williams;5814
nick jonas;5815
owl city;5816
the gourds;5817
cowboy junkies;5818
cru;5819
the rembrandts;5820
useless id;5821
jessica andrews;5822
big black;5823
my brightest diamond;5824
johnny kidd & the pirates;5825
moonface;5826
angra;5827
john mccutcheon;5828
sharon jones & the dap-kings;5829
brother dege;5830
john p. kee;5831
armin van buuren;5832
houndmouth;5833
the spencer davis group;5834
poison idea;5835
carach angren;5836
the horrors;5837
johnny paycheck;5838
primal fear;5839
joanna newsom;5840
weezer;5841
bluehorses;5842
architecture in helsinki;5843
steve mcconnell;5844
townes van zandt;5845
johnny cash & june carter cash;5846
dust of basement;5847
j dilla;5848
too pure to die;5849
car seat headrest;5850
rita springer;5851
max romeo;5852
calabrese;5853
trey anastasio;5854
young thug;5855
harry and the potters;5856
clifford t. ward;5857
confederate railroad;5858
ice mc;5859
tyler shaw;5860
will smith;5861
anna tivel;5862
the pierces;5863
sabbat;5864
papercuts;5865
trouble over tokyo;5866
a storm of light;5867
earl scruggs;5868
limahl;5869
band of horses;5870
suspyre;5871
shy girls;5872
blood;5873
madness;5874
georgie fame;5875
glass tiger;5876
50 cent;5877
mike doughty;5878
y-o-u;5879
ass ponys;5880
mary gauthier;5881
goldfinger;5882
delbert mcclinton;5883
freak kitchen;5884
lalaine;5885
destiny's child;5886
thirsty merc;5887
daniel bedingfield;5888
armored saint;5889
¡mayday!;5890
cheryl cole;5891
richie spice;5892
luscious jackson;5893
altan;5894
evanescence;5895
luther vandross;5896
steve wariner;5897
deadlock;5898
brenda lee;5899
noe venable;5900
korn;5901
the letter black;5902
star fucking hipsters;5903
daniel o'donnell;5904
theatres des vampires;5905
the dogma;5906
ria mae;5907
thy art is murder;5908
mali music;5909
eydie gorme;5910
housefires;5911
brooks & dunn;5912
million dead;5913
kurt vile;5914
3lw;5915
helix;5916
judy collins;5917
albert hammond;5918
coroner;5919
red flag;5920
ralph vaughan williams;5921
infected rain;5922
ann wilson;5923
anthony evans;5924
christina milian;5925
taco;5926
lee greenwood;5927
jon anderson;5928
bun b;5929
skye sweetnam;5930
britney spears;5931
peter serkin;5932
saywecanfly;5933
gordon haskell;5934
grouplove;5935
walter egan;5936
malinky;5937
mandy barnett;5938
mystery skulls;5939
jeremy larson;5940
charley patton;5941
modern english;5942
inhale exhale;5943
avantasia;5944
huntingtons;5945
shudder to think;5946
the brand new heavies;5947
slice the cake;5948
sick of sarah;5949
winds;5950
the rakes;5951
ray lamontagne;5952
haircut 100;5953
your demise;5954
exposé;5955
narada michael walden;5956
lord of the lost;5957
the rubettes;5958
aloe blacc;5959
jeff wayne;5960
in this moment;5961
the move;5962
machine men;5963
orenda fink;5964
tina dico;5965
ziggy marley & the melody makers;5966
noël coward;5967
bonfire;5968
hawkwind;5969
jessie j;5970
emitt rhodes;5971
john martyn;5972
blue öyster cult;5973
the silver shine;5974
tex ritter;5975
kishi bashi;5976
sonny james;5977
mind's eye;5978
the sleeping;5979
the derek trucks band;5980
atmosphere;5981
lauren daigle;5982
taj weekes & adowa;5983
ricky skaggs;5984
alabama shakes;5985
black star;5986
gong;5987
viper;5988
fruit bats;5989
allie x;5990
josh woodward;5991
kungfu rick;5992
the weavers;5993
blood, sweat & tears;5994
the stooges;5995
the white birch;5996
john mayall;5997
diamond d;5998
luigi boccherini;5999
half man half biscuit;6000
ralph mctell;6001
lisa brokop;6002
son lux;6003
wumpscut;6004
beneath the massacre;6005
nine inch nails;6006
ancient rites;6007
drop dead, gorgeous;6008
no mercy;6009
lene lovich;6010
widowmaker;6011
the microphones;6012
rita connolly;6013
generation x;6014
assassin;6015
horse feathers;6016
lola monroe;6017
bette midler;6018
gentleman;6019
the crystal method;6020
crystal bernard;6021
black 47;6022
starbenders;6023
landmine marathon;6024
múm;6025
califone;6026
allister;6027
for all those sleeping;6028
fgfc820;6029
anner bylsma;6030
lilys;6031
triptykon;6032
danger doom;6033
big k.r.i.t.;6034
assemblage 23;6035
howie day;6036
miike snow;6037
diiv;6038
the rapture;6039
the civil wars;6040
liege lord;6041
nicolette larson;6042
slayer;6043
2pac;6044
16;6045
grigory sokolov;6046
martin carthy;6047
bowerbirds;6048
ginger;6049
luka bloom;6050
young jeezy;6051
kate rusby;6052
the pretty things;6053
2 chainz;6054
man overboard;6055
paul tortelier;6056
doyle lawson & quicksilver;6057
timber timbre;6058
johnny rodriguez;6059
and one;6060
grand funk railroad;6061
kiss;6062
elysian fields;6063
ace enders & a million different people;6064
hoods;6065
frenzal rhomb;6066
army of freshmen;6067
unter null;6068
frankie lee sims;6069
joe;6070
sahara hotnights;6071
alison moyet;6072
janis ian;6073
delinquent habits;6074
heffron drive;6075
jackie wilson;6076
osi;6077
mobb deep;6078
corey cerovsek;6079
pras;6080
buckcherry;6081
dave davies;6082
beto vázquez infinity;6083
drowning the light;6084
attack attack!;6085
day of fire;6086
ida haendel;6087
ytcracker;6088
u2;6089
stan freberg;6090
saint lu;6091
jj;6092
robert bradley's blackwater surprise;6093
pharrell williams;6094
no trend;6095
darkwell;6096
van dyke parks;6097
cannonball statman;6098
thursday;6099
kathleen edwards;6100
sentenced;6101
crown the empire;6102
crimson thorn;6103
george benson;6104
the concretes;6105
kahimi karie;6106
jimi hendrix;6107
dock boggs;6108
tom vek;6109
(spunge);6110
the refreshments;6111
twilightning;6112
the-dream;6113
portishead;6114
eamon;6115
soulfly;6116
gungor;6117
stemm;6118
mc hammer;6119
the kinks;6120
budgie;6121
pistol annies;6122
froggy fresh;6123
shaye;6124
tlc;6125
the chameleons;6126
boss hogg outlawz;6127
starbomb;6128
sleigh bells;6129
inquisition;6130
h-blockx;6131
the ting tings;6132
johnny hallyday;6133
night ranger;6134
bowes & morley;6135
ronan keating;6136
d'sound;6137
miki howard;6138
sadat x;6139
gloria estefan;6140
mighty sparrow;6141
the shins;6142
starfield;6143
plan b;6144
marcus orelias;6145
neaera;6146
miss black america;6147
arena;6148
assück;6149
a. l. lloyd;6150
los lobos;6151
artur schnabel;6152
boyce avenue;6153
chixdiggit!;6154
stevie wonder;6155
howlin rain;6156
junior wells;6157
con funk shun;6158
nickel creek;6159
there for tomorrow;6160
brian peters;6161
mgmt;6162
his name is alive;6163
jaya the cat;6164
chiodos;6165
teacup monster;6166
har mar superstar;6167
alkaline trio;6168
kidz bop;6169
baxter;6170
scary bitches;6171
ron wood;6172
dashboard confessional;6173
iron fire;6174
shimshai;6175
carl maria von weber;6176
aversions crown;6177
apartment 26;6178
alcazar;6179
skeleton key;6180
the burning hell;6181
bonded by blood;6182
bob marley & the wailers;6183
spirit;6184
the jackson 5;6185
george morgan;6186
alabama 3;6187
the sensational alex harvey band;6188
drapht;6189
nevertheless;6190
china crisis;6191
selena;6192
bodies of water;6193
crush 40;6194
architects;6195
darius rucker;6196
smile.dk;6197
puhdys;6198
savoy brown;6199
rose royce;6200
plumb;6201
roger waters;6202
doc watson;6203
neal morse;6204
edwin fischer;6205
tracy chapman;6206
the who;6207
cal smith;6208
mos def;6209
matt cardle;6210
dr. dre;6211
ronnie milsap;6212
anthrax;6213
tw walsh;6214
numb;6215
jenny o.;6216
lock up;6217
bear in heaven;6218
susanna hoffs;6219
jessie ware;6220
eric bogle;6221
johnny thunders;6222
advance base;6223
sara gazarek;6224
misfits;6225
the used;6226
catie curtis;6227
thundercat;6228
derek minor;6229
basshunter;6230
johann pachelbel;6231
whitehorse;6232
bessie smith;6233
mike garrigan;6234
brownie mcghee;6235
otis rush;6236
negative;6237
slugdge;6238
bang gang;6239
debbie gibson;6240
da vinci's notebook;6241
nargaroth;6242
champion jack dupree;6243
hubert kah;6244
ben lee;6245
gary barlow;6246
math and physics club;6247
eighteen visions;6248
supersuckers;6249
the fixx;6250
amazing blondel;6251
morcheeba;6252
petula clark;6253
the youngbloods;6254
carman;6255
southern culture on the skids;6256
evan taubenfeld;6257
tracy grammer;6258
maritime;6259
randy vanwarmer;6260
believer;6261
the grapes of wrath;6262
angelina;6263
john prine;6264
dee snider;6265
jon and vangelis;6266
the seer;6267
george jones;6268
amanda perez;6269
lonnie donegan;6270
lara fabian;6271
dr. acula;6272
cockney rejects;6273
junkie xl;6274
nasty c;6275
the shadows;6276
this beautiful republic;6277
12 rods;6278
action action;6279
jann arden;6280
the oppressed;6281
emanuel;6282
jennifer rush;6283
ufo;6284
leroy anderson;6285
the submarines;6286
vanilla ice;6287
the murder city devils;6288
johnny mathis;6289
michael card;6290
rheostatics;6291
no knife;6292
curved air;6293
mu330;6294
bat for lashes;6295
abigor;6296
new boyz;6297
nb ridaz;6298
brian mcfadden;6299
kylesa;6300
randy bachman;6301
brandon heath;6302
the adicts;6303
longwave;6304
earl thomas conley;6305
lethian dreams;6306
conchita wurst;6307
southgang;6308
david oistrakh;6309
a change of pace;6310
it lives, it breathes;6311
the format;6312
enrico caruso;6313
ron hawkins;6314
sufjan stevens;6315
krystian zimerman;6316
it prevails;6317
redrama;6318
warcloud;6319
the kids from fame;6320
brodka;6321
nujabes;6322
bt;6323
voltaire;6324
one be lo;6325
damon & naomi;6326
datarock;6327
willow smith;6328
snowy white;6329
mercyful fate;6330
veruca salt;6331
joe bonamassa;6332
rag'n'bone man;6333
aqualung;6334
solomon burke;6335
vicious rumors;6336
spitalfield;6337
hardcore superstar;6338
vern gosdin;6339
yendri;6340
pernice brothers;6341
vangough;6342
t-bone;6343
sopor aeternus;6344
day at the fair;6345
feed her to the sharks;6346
dokken;6347
teena marie;6348
json;6349
kaipa;6350
hidden in plain view;6351
belinda carlisle;6352
neil cicierega;6353
brenda russell;6354
esther ofarim;6355
sea wolf;6356
fm laeti;6357
james hunter;6358
the soul of john black;6359
david bromberg;6360
marc cohn;6361
the duskfall;6362
galantis;6363
tender;6364
martin jondo;6365
ricky nelson;6366
jeremy spencer;6367
colin linden;6368
john cooper clarke;6369
the seekers;6370
abra moore;6371
breathe carolina;6372
lily & madeleine;6373
disarmonia mundi;6374
circa survive;6375
matt bianco;6376
celtic thunder;6377
flowing tears;6378
lee fields;6379
kate ryan;6380
meg & dia;6381
evils toy;6382
choirboys;6383
tedashii;6384
poni hoax;6385
yo yo ma;6386
the broken family band;6387
l'âme immortelle;6388
psalters;6389
benjamin britten;6390
shakira;6391
rabia sorda;6392
ad;6393
the cross;6394
billy boy arnold;6395
florida georgia line;6396
t.i.;6397
marty willson-piper;6398
client;6399
jack ingram;6400
sash!;6401
deathstars;6402
the english beat;6403
mitchel musso;6404
wintersleep;6405
the smiths;6406
fleetwood mac;6407
molly hatchet;6408
pet shop boys;6409
daryl hall;6410
after all;6411
j moss;6412
rüfüs du sol;6413
exodus;6414
francis dunnery;6415
4 strings;6416
heather headley;6417
besatt;6418
foreigner;6419
the pussycat dolls;6420
serena ryder;6421
white lion;6422
tim hardin;6423
harry connick, jr.;6424
islands;6425
arrested development;6426
coco montoya;6427
arcana;6428
marduk;6429
keith whitley;6430
al martino;6431
scatman john;6432
marvin gaye & tammi terrell;6433
eddie murphy;6434
rihanna;6435
candi staton;6436
my favorite;6437
the trews;6438
coven 13;6439
leo sayer;6440
lil' keke;6441
jewel;6442
firehouse;6443
claudio monteverdi;6444
negative approach;6445
ben caplan;6446
aïboforcen;6447
ryan shupe & the rubberband;6448
bethel music;6449
the courteeners;6450
mortal love;6451
yung lean;6452
altar boys;6453
aesop rock;6454
bret michaels;6455
die so fluid;6456
don mclean;6457
my morning jacket;6458
unearthly trance;6459
the war on drugs;6460
limp;6461
drake bell;6462
cky;6463
b.b. king;6464
mama cass;6465
dirty heads;6466
buffy sainte-marie;6467
dire straits;6468
menudo;6469
dolorian;6470
natalie imbruglia;6471
flunk;6472
carpark north;6473
fatso jetson;6474
hourglass;6475
graveland;6476
bert jansch;6477
the left banke;6478
the sound;6479
nancy lamott;6480
the mighty mighty bosstones;6481
lake of tears;6482
paper aeroplanes;6483
the archies;6484
the wonder years;6485
con brio;6486
the treatment;6487
fats domino;6488
heaven shall burn;6489
the suburbs;6490
laura gibson;6491
an angle;6492
hunters & collectors;6493
jellyfish;6494
jim capaldi;6495
hybrid;6496
five for fighting;6497
kevin fowler;6498
douwe bob;6499
george nozuka;6500
american head charge;6501
roots manuva;6502
cephalic carnage;6503
prodigy;6504
far east movement;6505
the spinners;6506
plankeye;6507
pitbull;6508
reggie and the full effect;6509
frightened rabbit;6510
i see stars;6511
lorde;6512
vanessa williams;6513
oh land;6514
luciano;6515
shockwave;6516
i set my friends on fire;6517
eagles of death metal;6518
above & beyond;6519
mindless self indulgence;6520
sun kil moon;6521
strawbs;6522
vision divine;6523
julien-k;6524
the velvet teen;6525
jack greene;6526
devo;6527
caesar;6528
ritchie valens;6529
andr previn;6530
116 clique;6531
ali project;6532
leæther strip;6533
take that;6534
five iron frenzy;6535
eugenio finardi;6536
star one;6537
barbara mandrell;6538
to speak of wolves;6539
massive ego;6540
my ticket home;6541
the riverboat gamblers;6542
epic rap battles of history;6543
revolver;6544
waltari;6545
k. michelle;6546
natural;6547
madder mortem;6548
crystal gayle;6549
yoko ono;6550
robbie fulks;6551
xandria;6552
beyoncé;6553
beth hirsch;6554
passion pit;6555
magnum;6556
william beckett;6557
the beautiful south;6558
shakin' stevens;6559
samantha fox;6560
england dan & john ford coley;6561
orchestral manoeuvres in the dark;6562
chrom;6563
deana carter;6564
dan seals;6565
crimson moonlight;6566
alvin lee;6567
army of lovers;6568
the friday night boys;6569
chris august;6570
stephen lynch;6571
loudon wainwright iii;6572
the helio sequence;6573
partynextdoor;6574
roberta flack;6575
mr. bungle;6576
sóley;6577
bruce springsteen;6578
dmitry bashkirov;6579
billy preston;6580
department of eagles;6581
denison witmer;6582
modest petrovich mussorgsky;6583
antonio meneses;6584
kathryn williams;6585
jim ed brown;6586
arabesque;6587
m83;6588
johann strauss ii;6589
agnes;6590
alannah myles;6591
most precious blood;6592
incubus;6593
rialto;6594
a.c.t;6595
klone;6596
jp cooper;6597
hate dept.;6598
anderson .paak;6599
viva voce;6600
talisco;6601
survivor;6602
the manhattan transfer;6603
van cliburn;6604
maylene and the sons of disaster;6605
brendan perry;6606
derek and the dominos;6607
kovacs;6608
the association;6609
fischer-z;6610
fred neil;6611
letlive;6612
aberfeldy;6613
onyx;6614
dig;6615
smokie;6616
gabriel brown;6617
jakob dylan;6618
imogen heap;6619
lacrosse;6620
the kovenant;6621
lotte kestner;6622
das pop;6623
andreya triana;6624
the delmore brothers;6625
talking heads;6626
ty england;6627
we are the in crowd;6628
nick drake;6629
dead or alive;6630
jessica harp;6631
deathgaze;6632
more machine than man;6633
rage;6634
kim churchill;6635
5 chinese brothers;6636
emperor;6637
the mavericks;6638
aloha;6639
999;6640
leonard cohen;6641
gabrielle aplin;6642
living sacrifice;6643
matt mays;6644
too bad eugene;6645
crystal fighters;6646
harlan howard;6647
wendy matthews;6648
danny kirwan;6649
john barrowman;6650
those dancing days;6651
thor;6652
digger;6653
steve earle;6654
penal colony;6655
davey suicide;6656
rise against;6657
iron maiden;6658
world party;6659
daforce;6660
the monkees;6661
yukmouth;6662
demolition hammer;6663
edguy;6664
winds of plague;6665
flatsound;6666
james "j.t." taylor;6667
dave alvin;6668
dimmu borgir;6669
kreator;6670
pop etc;6671
c.w. mccall;6672
green river ordinance;6673
dave dudley;6674
steely dan;6675
murderdolls;6676
de la soul;6677
the dayton family;6678
transatlantic;6679
neneh cherry;6680
pete townshend;6681
the red jumpsuit apparatus;6682
abandon all ships;6683
john brown's body;6684
karl wolf;6685
los pericos;6686
this mortal coil;6687
emily haines;6688
pretenders;6689
boytronic;6690
bloodgood;6691
unit;6692
boyzone;6693
ian & sylvia;6694
jesse harris & the ferdinandos;6695
haley reinhart;6696
sinergy;6697
gareth gates;6698
kevin lyttle;6699
cast;6700
britny fox;6701
jack jones;6702
billy joel;6703
the maine;6704
david bazan;6705
hate;6706
intwine;6707
pigface;6708
lali puna;6709
david usher;6710
the shirelles;6711
gerald moore;6712
nicole dollanganger;6713
rex goudie;6714
pablo casals;6715
the veils;6716
low pop suicide;6717
graham colton;6718
john entwistle;6719
meshuggah;6720
chris webby;6721
jimmy dawkins;6722
russ taff;6723
gabriella cilmi;6724
balto;6725
peetie wheatstraw;6726
gary clark jr.;6727
the clovers;6728
the agony scene;6729
roland grapow;6730
gary stewart;6731
buddy guy;6732
necrodeath;6733
st. vincent;6734
firefall;6735
slechtvalk;6736
the boys;6737
the twins;6738
chandeen;6739
originoo gunn clappaz;6740
annie herring;6741
shai linne;6742
steve carlson;6743
the gray havens;6744
matthew sweet and susanna hoffs;6745
livingston taylor;6746
pro-pain;6747
elbow;6748
mandalay;6749
sirenia;6750
modern skirts;6751
jasmine thompson;6752
marcia griffiths;6753
the swellers;6754
michael monroe;6755
tom rosenthal;6756
florence + the machine;6757
thunder;6758
amber;6759
grave;6760
violent work of art;6761
gene vincent;6762
sarina paris;6763
polyenso;6764
mark seymour & the undertow;6765
tism;6766
the dubliners;6767
bonnie raitt;6768
michelle williams;6769
blank & jones;6770
walls of jericho;6771
lupe fiasco;6772
james marsters;6773
metal church;6774
excision;6775
keith murray;6776
john ogdon;6777
the low anthem;6778
chris merritt;6779
maysa leak;6780
henry fiat's open sore;6781
ernestine anderson;6782
capital lights;6783
the cooper temple clause;6784
alan jackson;6785
hey;6786
pathology;6787
randy rogers band;6788
tink;6789
flesh field;6790
vinyl theatre;6791
dystopia;6792
jill barber;6793
the long blondes;6794
the color morale;6795
giuseppe verdi;6796
karin park;6797
amy grant;6798
miasmal;6799
gene watson;6800
page & plant;6801
acid witch;6802
adagio;6803
lisa ekdahl;6804
martin page;6805
triumvirat;6806
s.l.a.b.;6807
8ball & mjg;6808
better luck next time;6809
marble sounds;6810
whitney houston;6811
ringo starr;6812
the scabs;6813
the whitest boy alive;6814
third eye blind;6815
thornley;6816
herbert grönemeyer;6817
stereo skyline;6818
amos lee;6819
shawn james;6820
parkway drive;6821
trippie redd;6822
vanessa carlton;6823
guardian;6824
blowsight;6825
san francisco symphony;6826
five;6827
geoff moore;6828
david meece;6829
enigma;6830
cold chisel;6831
the impossibles;6832
sondre lerche;6833
hey monday;6834
sol gabetta;6835
machinae supremacy;6836
p.m. dawn;6837
johnny rivers;6838
mickey newbury;6839
sandy & junior;6840
taylor swift;6841
aaradhna;6842
edward elgar;6843
lamb of god;6844
antestor;6845
doom;6846
arcturus;6847
kingdom come;6848
flo rida;6849
afroman;6850
mickey gilley;6851
donna regina;6852
erasure;6853
leonard nimoy;6854
susan raye;6855
die form;6856
allure;6857
quincy punx;6858
secret army;6859
perry como;6860
aldo nova;6861
the loud family;6862
the suicide file;6863
maggie rogers;6864
melanie;6865
andrew lloyd webber;6866
sparks;6867
angels & agony;6868
shooter jennings;6869
m people;6870
the story so far;6871
aqua;6872
scott matthews;6873
chemlab;6874
apollo sunshine;6875
mason proper;6876
bobby o;6877
action adventure world;6878
hawk nelson;6879
audrey assad;6880
jerry lee lewis;6881
andy m. stewart;6882
grimskunk;6883
rudy vallée;6884
erin mccarley;6885
z-ro;6886
crooked still;6887
nadeah;6888
marah;6889
sinner;6890
saint raymond;6891
devin townsend;6892
james young;6893
dillon;6894
blackrain;6895
crisis;6896
loney, dear;6897
dark the suns;6898
damone;6899
big big train;6900
pride & glory;6901
cancer bats;6902
dismantled;6903
the cheetah girls;6904
the saints;6905
jonathan larson;6906
everything but the girl;6907
glass hammer;6908
chairlift;6909
scarve;6910
mai kuraki;6911
travie mccoy;6912
steeleye span;6913
chris de burgh;6914
james otto;6915
chris rea;6916
forever slave;6917
bobby vee;6918
susannah mccorkle;6919
goodie mob;6920
girls aloud;6921
lake street dive;6922
frankenstein drag queens from planet 13;6923
vance joy;6924
anubis;6925
sabrina starke;6926
level;6927
kick axe;6928
nanci griffith;6929
ben moody;6930
epica;6931
field music;6932
lady antebellum;6933
grieves;6934
osborne brothers;6935
judie tzuke;6936
rick james;6937
willie d;6938
at the gates;6939
daniel shafran;6940
mr. mister;6941
beulah;6942
robert cray;6943
hatesphere;6944
tim fite;6945
clay walker;6946
the four seasons;6947
vintage trouble;6948
the rankin family;6949
chaka khan;6950
paul wall;6951
brian wilson;6952
alestorm;6953
明星 (akeboshi);6954
billy gilman;6955
jennifer kimball;6956
the charlie daniels band;6957
john mellencamp;6958
revamp;6959
lost in tears;6960
just jinger;6961
mental as anything;6962
dark tranquillity;6963
c-lekktor;6964
the trammps;6965
hb;6966
the ready set;6967
joey tempest;6968
baton rouge;6969
lana del rey;6970
ms. dynamite;6971
radio birdman;6972
cows;6973
pansy division;6974
klaatu;6975
ryan delmore;6976
laurie anderson;6977
trespassers william;6978
geoff berner;6979
black sabbath;6980
eric saade;6981
meja;6982
cannibal corpse;6983
with confidence;6984
jim jones;6985
blake shelton;6986
the nightwatchman;6987
elephant man;6988
massive attack;6989
lee kernaghan;6990
tomorrows bad seeds;6991
aynsley lister;6992
hanoi rocks;6993
mono inc.;6994
linton kwesi johnson;6995
richard & linda thompson;6996
lard;6997
tq;6998
blue café;6999
blackjack;7000
inspectah deck;7001
harry james;7002
no fun at all;7003
bad brains;7004
herman's hermits;7005
intense;7006
mink deville;7007
future islands;7008
the midnight beast;7009
sarah connor;7010
psycho motel;7011
last train home;7012
alberta cross;7013
helen humes;7014
easton corbin;7015
yngwie malmsteen;7016
tinfed;7017
thee silver mt. zion;7018
lalah hathaway;7019
sheavy;7020
slaughter;7021
tunng;7022
the four freshmen;7023
cymbals eat guitars;7024
debarge;7025
the beatles;7026
lower definition;7027
michael martin murphey;7028
bury tomorrow;7029
twista;7030
hazel o'connor;7031
kerry livgren;7032
6ix9ine;7033
paul colman trio;7034
clean bandit;7035
seam;7036
dodgy;7037
the lovin' spoonful;7038
art garfunkel;7039
lee ryan;7040
la sera;7041
c.w. stoneking;7042
johann christian bach;7043
8 foot sativa;7044
beast in black;7045
straylight run;7046
saves the day;7047
robyn;7048
the blasters;7049
petra;7050
halou;7051
the neighbourhood;7052
runrig;7053
billy ocean;7054
die young;7055
rainbirds;7056
play;7057
atomic kitten;7058
hallelujah the hills;7059
amaranthe;7060
koffin kats;7061
swingin' utters;7062
charlie musselwhite;7063
phillips, craig & dean;7064
tarkio;7065
avulsed;7066
unwritten law;7067
al axy;7068
freddie mercury;7069
rhye;7070
giorgio moroder;7071
elegy;7072
blue stahli;7073
zachary richard;7074
børns;7075
bob moses;7076
chromatics;7077
the barr brothers;7078
gorefest;7079
countess;7080
caetano veloso;7081
jerry butler;7082
steve harley & cockney rebel;7083
michelle branch;7084
dr. john;7085
the business;7086
handsome ghost;7087
naked eyes;7088
amduscia;7089
scooter;7090
bride;7091
white sea;7092
empire! empire! (i was a lonely estate);7093
mastodon;7094
amon amarth;7095
mike posner;7096
dove cameron;7097
desmond dekker;7098
the rugburns;7099
harley poe;7100
skinless;7101
secret garden;7102
colter wall;7103
john cena;7104
jump5;7105
cat stevens;7106
tim curry;7107
the classic crime;7108
grey delisle;7109
ramblin' jack elliott;7110
the world is a beautiful place & i am no longer afraid to die;7111
the spinto band;7112
the chainsmokers;7113
clawfinger;7114
the thrills;7115
ultravox;7116
quasi;7117
wonder girls;7118
the psycho realm;7119
alice in chains;7120
bill wyman's rhythm kings;7121
quiet riot;7122
rome;7123
fair warning;7124
to/die/for;7125
lari white;7126
backseat goodbye;7127
rhett miller;7128
eliza neals;7129
conway twitty;7130
asg;7131
highasakite;7132
arthur grumiaux;7133
pieter wispelwey;7134
union j;7135
jon randall;7136
allies;7137
modern romance;7138
krisiun;7139
s;7140
killa kyleon;7141
adam lambert;7142
skrew;7143
imagination movers;7144
jade warrior;7145
beborn beton;7146
webbie;7147
richard o'brien;7148
ella fitzgerald & louis armstrong;7149
denis matsuev;7150
kultur shock;7151
la dispute;7152
sean watkins;7153
tex williams;7154
cary brothers;7155
joan of arc;7156
dead hand projekt;7157
dio;7158
big tymers;7159
tokyo police club;7160
midge ure;7161
adam cohen;7162
forever changed;7163
more than life;7164
avenged sevenfold;7165
lifetime;7166
the clash;7167
five finger death punch;7168
zac brown band;7169
just surrender;7170
roosevelt sykes;7171
cory morrow;7172
岡崎律子 (ritsuko okazaki);7173
88 fingers louie;7174
brenton wood;7175
bill mallonee;7176
keziah jones;7177
lebanon hanover;7178
raspberries;7179
mygrain;7180
sita;7181
twenty one pilots;7182
bill withers;7183
family force 5;7184
t.a.t.u.;7185
rainbow;7186
marianas trench;7187
cascada;7188
the watchmen;7189
cold;7190
annette hanshaw;7191
james labrie;7192
viktoria mullova;7193
altaria;7194
icon of coil;7195
oliver koletzki;7196
kansas;7197
michael bolton;7198
keren ann;7199
lil pump;7200
john lennon & yoko ono;7201
jim lauderdale;7202
the manhattans;7203
stacey kent;7204
lee brice;7205
endo;7206
dani siciliano;7207
game theory;7208
lykke li;7209
hoobastank;7210
lil suzy;7211
since october;7212
tinchy stryder;7213
camera obscura;7214
macabre;7215
16 volt;7216
merle haggard;7217
i am abomination;7218
nana mouskouri;7219
tift merritt;7220
optiganally yours;7221
dwight twilley;7222
evgeny kissin;7223
david grisman;7224
ohio players;7225
liberty x;7226
matt haimovitz;7227
robbie williams;7228
grayson & whitter;7229
dream evil;7230
beau;7231
t.j. miller;7232
the bar-kays;7233
u.d.o.;7234
keke palmer;7235
ambrosia;7236
aswad;7237
henry mancini;7238
frida hyvönen;7239
shakespears sister;7240
celph titled;7241
pigeon john;7242
on broken wings;7243
javier;7244
billy currington;7245
a silent film;7246
sly & the family stone;7247
stephen duffy;7248
vicki lawrence;7249
lucky dube;7250
the georgia satellites;7251
bonnie tyler;7252
press play;7253
the academy is...;7254
strfkr;7255
steeler;7256
haystak;7257
frank black;7258
hot chocolate;7259
margo price;7260
club 8;7261
rick astley;7262
bone thugs-n-harmony;7263
true widow;7264
mark king;7265
the rascals;7266
vanden plas;7267
cocteau twins;7268
london elektricity;7269
the triffids;7270
jolie holland;7271
sole;7272
ben taylor;7273
equatronic;7274
the million dollar quartet;7275
jean beauvoir;7276
marillion;7277
tom cochrane;7278
friend 'n fellow;7279
jeremy camp;7280
7th cycle;7281
peter, paul & mary;7282
hadise;7283
dark angel;7284
dark age;7285
bo diddley;7286
the pineapple thief;7287
eli sostre;7288
psapp;7289
a loss for words;7290
taylor dayne;7291
hanne kah;7292
paul young;7293
swim deep;7294
kanye west;7295
scott mckenzie;7296
epmd;7297
anthony phillips;7298
silent civilian;7299
strange majik;7300
lisa thiel;7301
shad;7302
joseph arthur and the lonely astronauts;7303
a life once lost;7304
scarling.;7305
the stanley brothers;7306
nat stuckey;7307
full blown chaos;7308
mark knopfler & emmylou harris;7309
the wedding;7310
the weepies;7311
geto boys;7312
disclosure;7313
tonio k;7314
brave saint saturn;7315
the proclaimers;7316
matthew ryan;7317
ernest tubb;7318
cibelle;7319
cause for effect;7320
milburn;7321
cherry poppin' daddies;7322
berlin;7323
reverend bizarre;7324
brazzaville;7325
george "harmonica" smith;7326
garfunkel and oates;7327
ashley tisdale;7328
james mcmurtry;7329
vendetta;7330
bon iver;7331
steelheart;7332
horrorpops;7333
neuroticfish;7334
airbourne;7335
brick & lace;7336
dead meadow;7337
the cave singers;7338
moneybrother;7339
audra mcdonald;7340
ensiferum;7341
2 unlimited;7342
mason jennings;7343
a flock of seagulls;7344
tiger army;7345
bikini kill;7346
the vibrators;7347
the pillows;7348
shocking blue;7349
luca turilli;7350
soen;7351
roger miret and the disasters;7352
magica;7353
mac demarco;7354
antimatter;7355
tommy james;7356
nine lashes;7357
discordance axis;7358
against me!;7359
stavesacre;7360
the meads of asphodel;7361
blood stain child;7362
ohbijou;7363
little simz;7364
abigail washburn;7365
the felice brothers;7366
running wild;7367
a.a. bondy;7368
bitch;7369
die happy;7370
pepper;7371
long john baldry;7372
the eyes of a traitor;7373
fran ois couperin;7374
the shangri-las;7375
diana ross & the supremes and the temptations;7376
mgła;7377
the scary jokes;7378
spandau ballet;7379
sophie ellis-bextor;7380
stauros;7381
chris cornell;7382
slade;7383
medina;7384
dr. feelgood;7385
louis armstrong;7386
sticky fingaz;7387
steve perry;7388
the vincent black shadow;7389
procol harum;7390
pig;7391
a house;7392
dead and divine;7393
artillery;7394
tape five;7395
tommy dorsey;7396
new york dolls;7397
mull historical society;7398
richard thompson;7399
don francisco;7400
charles manson;7401
rotten sound;7402
stevie b;7403
scapegoat;7404
soul embraced;7405
the sound of animals fighting;7406
steven isserlis;7407
marissa nadler;7408
mia doi todd;7409
warbringer;7410
giuseppe tartini;7411
the coup;7412
samhain;7413
hue and cry;7414
cut copy;7415
blue tears;7416
steve forbert;7417
kingdom of sorrow;7418
aviators;7419
soulsavers;7420
ike & tina turner;7421
the band;7422
drenge;7423
sarah jarosz;7424
stephan eicher;7425
killing joke;7426
nominon;7427
texas in july;7428
justin hayward;7429
gary puckett & the union gap;7430
kings of convenience;7431
look what i did;7432
sarah harmer;7433
king adora;7434
danny michel;7435
iration;7436
kate & anna mcgarrigle;7437
pep love;7438
eric burdon;7439
ace frehley;7440
wolfgang amadeus mozart;7441
the working title;7442
knife party;7443
wizard;7444
a lot like birds;7445
cody simpson;7446
land of talk;7447
the kentucky headhunters;7448
matt pond pa;7449
casual;7450
anaïs mitchell;7451
phoebe carrai;7452
funkadelic;7453
richard strauss;7454
ray charles;7455
s.o.a.p.;7456
tnt;7457
nas;7458
the commitments;7459
lany;7460
and also the trees;7461
the rumour said fire;7462
bonaparte;7463
the moog;7464
rza;7465
northern kings;7466
the pale fountains;7467
for today;7468
fyfe;7469
mumakil;7470
air;7471
periphery;7472
the xx;7473
yvonne elliman;7474
murs;7475
joy division;7476
p.o.d.;7477
informatik;7478
fu manchu;7479
razakel;7480
dappled cities;7481
christine fellows;7482
ion dissonance;7483
don gibson;7484
anvil;7485
lobo;7486
phillip boa & the voodooclub;7487
pulp;7488
moriz rosenthal;7489
run level zero;7490
the funeral pyre;7491
american juniors;7492
steelwing;7493
chic;7494
gustav holst;7495
pyramaze;7496
f-minus;7497
wyrd;7498
wolfpakk;7499
kool keith;7500
sleeping giant;7501
the loved ones;7502
the echoing green;7503
edgar winter;7504
angelus apatrida;7505
dante;7506
senses fail;7507
dave carter & tracy grammer;7508
gary wright;7509
the saturdays;7510
blindspott;7511
falkenbach;7512
outkast;7513
xp8;7514
pentatonix;7515
yourcodenameis;7516
moloko;7517
darkane;7518
kris kristofferson;7519
barry mcguire;7520
cancerslug;7521
hangar;7522
sirus;7523
sammy davis jr.;7524
trademark;7525
dennis deyoung;7526
kyuss;7527
naia izumi;7528
whose line is it anyway? cast;7529
bastro;7530
flora cash;7531
turnpike troubadours;7532
sam cooke;7533
matisyahu;7534
jungle rot;7535
weekend nachos;7536
microwave dave & the nukes;7537
dan hartman;7538
the cramps;7539
cryptic wintermoon;7540
apocalypse hoboken;7541
fugazi;7542
tye tribbett;7543
karla bonoff;7544
holly throsby;7545
from first to last;7546
fiction family;7547
21 savage;7548
free throw;7549
xlooking forwardx;7550
flatt & scruggs;7551
heather nova;7552
vandenberg;7553
rick ross;7554
arcane;7555
balkan beat box;7556
alaska thunderfuck;7557
halsey;7558
the menzingers;7559
iona;7560
peter frampton;7561
the agonist;7562
american aquarium;7563
enon;7564
cranes;7565
revocation;7566
chet atkins;7567
bound for glory;7568
the clancy brothers;7569
kataklysm;7570
juelz santana;7571
superchick;7572
hellogoodbye;7573
travis;7574
the grouch;7575
born from pain;7576
busdriver;7577
brooke fraser;7578
10 years;7579
the tony rich project;7580
streetlight manifesto;7581
i killed the prom queen;7582
jan howard;7583
fats waller;7584
jacques offenbach;7585
alexis taylor;7586
turk;7587
streetheart;7588
7 seconds;7589
bruce hornsby;7590
van hunt;7591
wynonna judd;7592
roger chapman;7593
pink cream 69;7594
the ballroom thieves;7595
genesis;7596
breakdown of sanity;7597
tommy page;7598
wormrot;7599
deuce;7600
charles bronson;7601
something corporate;7602
the showdown;7603
the lonely island;7604
bobby valentino;7605
akon;7606
steve mason;7607
dual core;7608
the church;7609
j church;7610
dino ciani;7611
kent;7612
emerson, lake & palmer;7613
the tangent;7614
the everly brothers;7615
billy joe shaver;7616
chavez;7617
jessica simpson;7618
sinheresy;7619
tommy roe;7620
agnes obel;7621
the pretty reckless;7622
jermaine stewart;7623
soja;7624
joy;7625
the aquabats!;7626
hot dad;7627
sheryl crow;7628
yo la tengo;7629
naked aggression;7630
neon synthesis;7631
raghav;7632
climie fisher;7633
kenny chesney;7634
little big town;7635
alan parsons;7636
linkin park;7637
laibach;7638
dar williams;7639
наив;7640
john foxx;7641
this bike is a pipe bomb;7642
buck owens & susan raye;7643
straight faced;7644
johnny clegg;7645
player;7646
gaetano donizetti;7647
alabama thunderpussy;7648
khia;7649
rascal flatts;7650
the marvelettes;7651
thirteen senses;7652
saidian;7653
nrbq;7654
trocadero;7655
i.d.o.4.;7656
richard swift;7657
krezip;7658
abc;7659
bell book & candle;7660
vera lynn;7661
b'z;7662
girls rituals;7663
kevin welch;7664
rich mullins;7665
down with webster;7666
tonic;7667
demiricous;7668
menomena;7669
guillemots;7670
monrose;7671
carson robison;7672
porter wagoner;7673
ivan moravec;7674
siobhan donaghy;7675
the bellrays;7676
the arka teks;7677
tonedeff;7678
david essex;7679
immortal technique;7680
buddy moss;7681
acid house kings;7682
mister monster;7683
the monochrome set;7684
the dignity of labour;7685
shawn desman;7686
the mamas & the papas;7687
the handsome family;7688
endanger;7689
south park;7690
karen clark sheard;7691
dressy bessy;7692
sweet comfort band;7693
darden smith;7694
slim cessna's auto club;7695
cher;7696
tom waits;7697
hanne hukkelberg;7698
beres hammond;7699
kayo dot;7700
the cliks;7701
years & years;7702
fraggle rock;7703
brandtson;7704
lukas nelson & promise of the real;7705
furry lewis;7706
jim noir;7707
in fear and faith;7708
ultra;7709
a global threat;7710
all them witches;7711
forbidden;7712
keith & kristyn getty;7713
edison glass;7714
discount;7715
thrush hermit;7716
joanie sommers;7717
vampire weekend;7718
oi polloi;7719
the bianca story;7720
unsung zeros;7721
circle jerks;7722
pascal rog ;7723
the 88;7724
we shot the moon;7725
blutengel;7726
demon hunter;7727
westside connection;7728
sho baraka;7729
the roots;7730
hoyt axton;7731
bleach;7732
the corries;7733
timbaland & magoo;7734
carl orff;7735
emmerson nogueira;7736
bob geldof;7737
mortician;7738
david ball;7739
blood tsunami;7740
john denver;7741
the klf;7742
mark erelli;7743
pat travers;7744
tina turner;7745
rhonda vincent;7746
entombed;7747
stutterfly;7748
lou rawls;7749
blaque;7750
connie smith;7751
utada hikaru;7752
zhu;7753
why don't we;7754
beachwood sparks;7755
the l-train;7756
beth orton;7757
guiomar novaes;7758
animosity;7759
deborah allen;7760
the real tuesday weld;7761
janelle monáe;7762
!!!;7763
hayley westenra;7764
avicii;7765
lil yachty;7766
brazilian girls;7767
the babys;7768
mirah;7769
rockapella;7770
the posies;7771
eden synthetic corps;7772
bright eyes;7773
black knights;7774
hoodoo gurus;7775
drain sth;7776
babe ruth;7777
watain;7778
lil b;7779
t'pau;7780
joe jackson;7781
june christy;7782
josh turner;7783
carly simon;7784
becoming the archetype;7785
alexisonfire;7786
paulson;7787
make them suffer;7788
drake;7789
billy thorpe;7790
earth, wind & fire;7791
belleruche;7792
the housemartins;7793
the chariot;7794
jefferson starship;7795
wolf gang;7796
roch voisine;7797
parachute;7798
ferruccio busoni;7799
grave digger;7800
piebald;7801
bob dylan;7802
wax;7803
eric fish;7804
mark spiro;7805
the seatbelts;7806
dawn richard;7807
usher;7808
lmfao;7809
i monster;7810
mikhail glinka;7811
nombe;7812
s club 7;7813
ray davies;7814
dusty springfield;7815
hieroglyphics;7816
steril;7817
vladimir sofronitsky;7818
大塚愛 (ai otsuka);7819
brendan benson;7820
cyferdyne;7821
fantasia;7822
zola jesus;7823
october fall;7824
she & him;7825
trixter;7826
damien rice;7827
delain;7828
dave stewart & the spiritual cowboys;7829
little boots;7830
crystallion;7831
rusted root;7832
blanks 77;7833
dirty looks;7834
within reason;7835
casey donahew band;7836
aythis;7837
the world of skin;7838
propaganda;7839
tom russell;7840
julia stone;7841
emigrate;7842
vaya con dios;7843
tove lo;7844
death by stereo;7845
stevie ray vaughan;7846
nim vind;7847
sonny terry & brownie mcghee;7848
sam phillips;7849
b! machine;7850
julio iglesias;7851
john wetton;7852
edgar broughton band;7853
dr. alban;7854
mars ill;7855
immaculate machine;7856
martyr defiled;7857
free dominguez;7858
atreyu;7859
johnny burnette;7860
maria mena;7861
spheric universe experience;7862
whitehouse;7863
zed;7864
andrea bocelli;7865
tommy james and the shondells;7866
malcolm middleton;7867
suicide commando;7868
mark owen;7869
the string cheese incident;7870
patti austin;7871
jethro tull;7872
jennifer lopez;7873
dorothy;7874
ayria;7875
guru;7876
burton cummings;7877
the warren brothers;7878
a1;7879
cody jinks;7880
billy squier;7881
junius;7882
never shout never;7883
the cryan' shames;7884
the heavy;7885
myra hess;7886
television;7887
sadist;7888
danger danger;7889
clique girlz;7890
jamie lawson;7891
rosanne cash;7892
walk off the earth;7893
scorpions;7894
kelly clarkson;7895
unknown;0
speaker;7896
singer;7897


================================================
FILE: jukebox/data/ids/v3_genre_ids.txt
================================================
electroclash;1
acid rock;2
christian metal;3
pop rock;4
gothic;5
big beat;6
psychedelic rock‎;7
funk carioca;8
bebop;9
dance punk;10
trad jazz;11
romantic;12
andean music;13
volksmusik;14
coldwave;15
gospel blues;16
italian folk;17
disney;18
dark wave‏‎;19
powerviolence;20
bachata;21
soft rock;22
s music"];23
bubblegum dance;24
western swing;25
alternative country;26
latin pop;27
eurobeat;28
n;29
unblack metal;30
surf;31
nu-disco;32
event;33
classical;34
nasheed;35
jovem guarda;36
british blues;37
bossa nova;38
detroit blues;39
rock;40
contemporary christian;41
dark ambient;42
noise rock;43
axé;44
soca;45
dance-rock;46
contemporary jazz;47
appalachian folk;48
humppa‎;49
ambient;50
funeral doom;51
southern gospel;52
video game‎;53
hip hop;54
glitch hop;55
krautrock;56
breakcore;57
ska;58
traditional folk;59
psychedelic trance;60
reggae‏‎;61
noise pop;62
drumstep;63
house;64
teen pop;65
sea shanties;66
junkanoo;67
mandopop;68
pre-war blues;69
doom metal;70
oi-punk;71
swamp rock;72
crunkcore;73
rap rock;74
roots;75
country rap;76
avant-garde;77
cumbia;78
glam metal;79
groove metal;80
electric blues;81
new orleans rhythm and blues;82
canadian hip hop;83
freestyle;84
deathgrind;85
idm;86
comedy rock;87
art punk;88
progg;89
work songs;90
art pop;91
conjunto;92
persian;93
parody;94
jazz-funk;95
french hip hop;96
spirituals;97
african;98
middle-eastern;99
minimal;100
ranchera;101
industrial rock;102
electro house;103
celtic rock;104
death doom;105
grupera;106
jazz fusion‎;107
political folk;108
christian punk;109
rapcore;110
j-pop;111
mashup;112
metalcore;113
progressive country;114
power noise;115
hip house;116
crossover thrash;117
electropop‎;118
psychedelic folk;119
punk rock;120
classic rock;121
zydeco;122
afrobeat;123
salsa;124
banda;125
chill-out;126
morna;127
minnesang;128
alternative metal;129
djent;130
african folk;131
mambo;132
sertanejo;133
classic pop;134
soul;135
australian hip hop;136
symphonic rock;137
celtic punk;138
synthpop‎;139
europop;140
funk;141
jazz blues;142
vocal trance;143
celtic fusion;144
industrial;145
kirtan;146
slowcore;147
flamenco;148
piano blues;149
texas blues;150
aggrotech;151
steampunk;152
opera;153
folktronica;154
klezmer;155
nwobhm;156
goregrind;157
rac;158
neo-psychedelia‏‎;159
post-rock‎;160
hard bop;161
gypsy jazz;162
new orleans blues;163
doo-wop;164
soul blues;165
trap;166
indietronica;167
psychobilly;168
euro disco;169
neo-progressive rock;170
canterbury;171
freak folk;172
midwest rap;173
instrumental rock;174
dance-pop;175
avant-garde metal;176
edm;177
deep house;178
progressive bluegrass;179
rave;180
australian folk;181
comic opera;182
sunshine pop;183
gregorian chant;184
psychedelic rock;185
honky tonk;186
rock 'n' roll;187
television;188
nintendocore;189
jump blues;190
roots reggae;191
traditional bluegrass;192
operatic pop;193
skate punk;194
reggaeton;195
manele;196
middle-eastern hip hop;197
skiffle;198
nsbm;199
nu jazz;200
disco;201
horrorcore;202
early music;203
post-bop;204
gothic rock;205
crack rock steady;206
easy listening;207
psychedelic;208
christian;209
brutal death metal;210
experimental rock;211
modern classical‎;212
drum and bass;213
dark wave;214
dubstep;215
grunge;216
christian hip hop;217
latin jazz;218
r&b;219
s music", ;220
free jazz;221
experimental hip hop;222
swing;223
smooth jazz;224
southern metal;225
religious;226
progressive death metal;227
contemporary folk;228
j-rock;229
jazz;230
hamburger schule;231
teen pop‎;232
crossover;233
italo disco;234
deathcore;235
blues;236
crunk;237
jangle pop;238
indian classical music;239
big band;240
proto-punk;241
dirty blues;242
garage punk;243
extreme metal;244
folk metal;245
neo soul;246
electric folk;247
synthwave;248
arena rock;249
post-grunge;250
indie rock;251
acoustic blues;252
native american;253
progressive trance;254
nu metal;255
digital hardcore;256
brazilian rock;257
funky house;258
symphonic black metal;259
lounge music;260
brega;261
trance;262
industrial metal;263
austropop;264
bhangra;265
new wave;266
neoclassical;267
post-metal;268
dub;269
industrial metal‎;270
irish folk;271
deutschrock;272
gypsy;273
dark electro;274
alternative hip hop;275
mbaqanga;276
swamp blues;277
french pop;278
tango;279
rockabilly;280
old-time music;281
blues rock;282
scottish folk;283
indie folk;284
nazi-punk;285
deutschpunk;286
piedmont blues;287
beatbox;288
worship;289
heavy metal;290
underground hip  hop;291
mixed;292
electro;293
tropicalismo;294
jazz fusion;295
worldbeat;296
hill country blues;297
a cappella;298
dixieland;299
hi-nrg;300
punk blues;301
anti-folk;302
east coast blues;303
polka;304
mod revival;305
soundtrack/musical;306
movie;307
outlaw country;308
rock against communism;309
barbershop;310
math rock;311
avant-garde‎;312
psychedelic pop;313
synthpop;314
post-punk‎;315
queercore;316
death metal;317
political hip hop;318
thrashcore;319
acid house;320
post-hardcore‎;321
electro-industrial;322
rio;323
southern hip hop;324
filk;325
duranguense;326
latin hip hop;327
pop punk;328
space rock;329
j-rap;330
deep house‎;331
baroque pop;332
chiptune;333
heartland rock;334
dancehall;335
experimental pop;336
adult contemporary‎;337
boogie woogie;338
country pop;339
power pop;340
west coast hip hop;341
thrash metal;342
avant-pop;343
enka;344
k-pop;345
post-britpop;346
vocalese;347
volkslied;348
reggae fusion;349
funk rock;350
tech house;351
adult contemporary;352
death 'n' roll;353
russian rock;354
latin rock;355
folk punk;356
west coast blues;357
progressive black metal;358
progressive metal;359
cajun;360
sophisti-pop;361
rock 'n' roll‎;362
post-punk;363
symphonic metal;364
beat;365
alternative rock‎;366
art rock;367
bakersfield sound;368
indie pop;369
folk;370
acid jazz;371
dream pop;372
pop-rap;373
eurodance;374
vaudeville;375
louisiana blues;376
baião;377
downtempo;378
jug band;379
neo-psychedelia;380
sufi;381
medieval;382
singer-songwriter‎;383
outsider music;384
pop-folk;385
martial industrial;386
samba;387
alternative dance;388
children's music‎;389
anarcho-punk;390
dark rock;391
rock en español;392
balearic beat;393
electropunk;394
urban contemporary;395
ragtime;396
british invasion;397
bubblegum pop;398
rap metal;399
soundtrack/television;400
blues revival;401
reggae;402
schlager;403
dance band;404
video game;405
crust punk;406
cabaret;407
ska punk‎;408
bolero;409
canadian folk;410
neofolk;411
shoegazing;412
acoustic;413
modern classical;414
swamp pop;415
celtic;416
futurepop;417
g-funk;418
norteño;419
orchestral;420
boogie rock;421
tejano;422
new age;423
soul jazz;424
cantopop;425
progressive metalcore;426
mathcore;427
new rave;428
neue deutsche welle;429
delta blues;430
lo-fi;431
poetry;432
hatecore;433
chanson;434
underground hip hop;435
pirate metal;436
trip hop;437
fado;438
americana;439
hardcore hip hop;440
post-industrial;441
grime;442
southern rock;443
grindcore;444
musical;445
hard trance;446
ska punk;447
post-rock;448
uk garage;449
melodic metalcore;450
black metal;451
visual kei;452
soundtrack;453
axé‎;454
hardcore punk;455
western;456
blackgaze;457
christian rock;458
technical death metal;459
christian hardcore;460
christmas;461
breakbeat;462
francophone;463
choral;464
progressive folk;465
mystic folk;466
melodic death metal;467
horror punk;468
country blues;469
nederpop;470
post-hardcore;471
future garage;472
techno;473
swiss rock;474
dance-pop‎;475
electronicore;476
post-punk revival;477
glitch;478
calypso;479
ragga;480
britpop;481
rock opera;482
cowpunk;483
la confusion des genres;484
alternative rock;485
surf rock;486
ballad;487
latin;488
contemporary r&b;489
forró;490
ethereal wave;491
electro swing;492
novelty;493
funk melody;494
punk cabaret;495
symphonic metal‎;496
pop;497
paisley underground;498
neue deutsche härte;499
glam rock;500
nerdcore hip hop;501
bluegrass;502
hardstyle;503
happy hardcore;504
baroque;505
speed metal;506
country;507
electropop;508
memphis blues;509
pagan metal;510
horror punk‏‎;511
mariachi;512
singer-songwriter;513
children's music;514
boogie;515
gothic metal;516
electronic rock;517
emo;518
gospel;519
ebm;520
roots rock;521
vocal;522
celtic folk;523
electronic;524
death  metal;525
gabber;526
deathrock;527
experimental;528
spoken word;529
screamo;530
finnish folk;531
singer only;532
new jack swing;533
acid techno;534
corrido;535
english folk;536
american folk;537
raï;538
drone doom;539
hard rock;540
piano rock;541
hawaiian;542
humppa;543
east coast hip hop;544
gypsy punk;545
country rock;546
jazz‎;547
mpb;548
harmonica blues;549
melodic hardcore;550
string band;551
anime;552
nu metalcore;553
progressive  rock;554
garage rock;555
dance;556
reggae rock;557
contemporary christian‎;558
sludge metal;559
minimal techno;560
folk rock;561
drone music;562
stoner rock;563
speedcore;564
chillwave;565
riot grrrl;566
chamber music;567
cool jazz;568
noise;569
vocal jazz;570
progressive rock;571
afropop;572
bro-country;573
goa trance;574
2-tone;575
miami bass;576
quiet storm;577
pub rock;578
power metal;579
blue-eyed soul;580
viking metal;581
gangsta rap;582
country pop‎;583
exotica;584
christian ska;585
jam band;586
chicago blues;587
street punk;588
funk metal;589
rap  metal;590
christian hymns;591
classic female blues;592
kizomba;593
comedy;594
dark cabaret;595
french house;596
progressive house;597
african blues;598
atmospheric black metal;599
pop rock‎;600
blackened death metal;601
shibuya-kei;602
electronica;603
unknown;0


================================================
FILE: jukebox/data/labels.py
================================================
import torch as t
import numpy as np
from jukebox.data.artist_genre_processor import ArtistGenreProcessor
from jukebox.data.text_processor import TextProcessor

# Linear window heurisic to get a window of lyric_tokens
def get_relevant_lyric_tokens(full_tokens, n_tokens, total_length, offset, duration):
    if len(full_tokens) < n_tokens:
        tokens = [0] * (n_tokens - len(full_tokens)) + full_tokens
        indices = [-1] * (n_tokens - len(full_tokens)) + list(range(0, len(full_tokens)))
    else:
        assert 0 <= offset < total_length
        midpoint = int(len(full_tokens) * (offset + duration / 2.0) / total_length)
        midpoint = min(max(midpoint, n_tokens // 2), len(full_tokens) - n_tokens // 2)
        tokens = full_tokens[midpoint - n_tokens // 2:midpoint + n_tokens // 2]
        indices = list(range(midpoint - n_tokens // 2, midpoint + n_tokens // 2))
    assert len(tokens) == n_tokens, f"Expected length {n_tokens}, got {len(tokens)}"
    assert len(indices) == n_tokens, f"Expected length {n_tokens}, got {len(indices)}"
    assert tokens == [full_tokens[index] if index != -1 else 0 for index in indices]
    return tokens, indices

class EmptyLabeller():
    def get_label(self, artist=None, genre=None, lyrics=None, total_length=None, offset=None):
        y = np.array([], dtype=np.int64)
        info = dict(artist="n/a", genre="n/a", lyrics=[], full_tokens=[])
        return dict(y=y, info=info)

    def get_batch_labels(self, metas, device='cpu'):
        ys, infos = [], []
        for meta in metas:
            label = self.get_label()
            y, info = label['y'], label['info']
            ys.append(y)
            infos.append(info)

        ys = t.stack([t.from_numpy(y) for y in ys], dim=0).to(device).long()
        assert ys.shape[0] == len(metas)
        assert len(infos) == len(metas)
        return dict(y=ys, info=infos)

class Labeller():
    def __init__(self, max_genre_words, n_tokens, sample_length, v3=False):
        self.ag_processor = ArtistGenreProcessor(v3)
        self.text_processor = TextProcessor(v3)
        self.n_tokens = n_tokens
        self.max_genre_words = max_genre_words
        self.sample_length = sample_length
        self.label_shape = (4 + self.max_genre_words + self.n_tokens, )

    def get_label(self, artist, genre, lyrics, total_length, offset):
        artist_id = self.ag_processor.get_artist_id(artist)
        genre_ids = self.ag_processor.get_genre_ids(genre)

        lyrics = self.text_processor.clean(lyrics)
        full_tokens = self.text_processor.tokenise(lyrics)
        tokens, _ = get_relevant_lyric_tokens(full_tokens, self.n_tokens, total_length, offset, self.sample_length)

        assert len(genre_ids) <= self.max_genre_words
        genre_ids = genre_ids + [-1] * (self.max_genre_words - len(genre_ids))
        y = np.array([total_length, offset, self.sample_length, artist_id, *genre_ids, *tokens], dtype=np.int64)
        assert y.shape == self.label_shape, f"Expected {self.label_shape}, got {y.shape}"
        info = dict(artist=artist, genre=genre, lyrics=lyrics, full_tokens=full_tokens)
        return dict(y=y, info=info)

    def get_y_from_ids(self, artist_id, genre_ids, lyric_tokens, total_length, offset):
        assert len(genre_ids) <= self.max_genre_words
        genre_ids = genre_ids + [-1] * (self.max_genre_words - len(genre_ids))
        if self.n_tokens > 0:
            assert len(lyric_tokens) == self.n_tokens
        else:
            lyric_tokens = []
        y = np.array([total_length, offset, self.sample_length, artist_id, *genre_ids, *lyric_tokens], dtype=np.int64)
        assert y.shape == self.label_shape, f"Expected {self.label_shape}, got {y.shape}"
        return y

    def get_batch_labels(self, metas, device='cpu'):
        ys, infos = [], []
        for meta in metas:
            label = self.get_label(**meta)
            y, info = label['y'], label['info']
            ys.append(y)
            infos.append(info)

        ys = t.stack([t.from_numpy(y) for y in ys], dim=0).to(device).long()
        assert ys.shape[0] == len(metas)
        assert len(infos) == len(metas)
        return dict(y=ys, info=infos)

    def set_y_lyric_tokens(self, ys, labels):
        info = labels['info']
        assert ys.shape[0] == len(info)
        if self.n_tokens > 0:
            # total_length, offset, duration):
            tokens_list = []
            indices_list = []  # whats the index of each current character in original array
            for i in range(ys.shape[0]):
                full_tokens = info[i]['full_tokens']
                total_length, offset, duration = ys[i, 0], ys[i, 1], ys[i, 2]
                tokens, indices = get_relevant_lyric_tokens(full_tokens, self.n_tokens, total_length, offset, duration)
                tokens_list.append(tokens)
                indices_list.append(indices)
            ys[:, -self.n_tokens:] = t.tensor(tokens_list, dtype=t.long, device='cuda')
            return indices_list
        else:
            return None

    def describe_label(self, y):
        assert y.shape == self.label_shape, f"Expected {self.label_shape}, got {y.shape}"
        y = np.array(y).tolist()
        total_length, offset, length, artist_id, *genre_ids = y[:4 + self.max_genre_words]
        tokens = y[4 + self.max_genre_words:]
        artist = self.ag_processor.get_artist(artist_id)
        genre = self.ag_processor.get_genre(genre_ids)
        lyrics = self.text_processor.textise(tokens)
        return dict(artist=artist, genre=genre, lyrics=lyrics)


if __name__ == '__main__':
    labeller = Labeller(5, 512, 8192*8*4*4, v3=False)
    label = labeller.get_label("Alan Jackson", "Country Rock", "old town road", 4*60*44100, 0)
    print(label, labeller.describe_label(label['y']))

    labeller = Labeller(1, 384, 6144*8*4*4, v3=True)
    label = labeller.get_label("Alan Jackson", "Country Rock", "old town road", 4*60*44100, 0)
    print(label, labeller.describe_label(label['y']))


================================================
FILE: jukebox/data/text_processor.py
================================================
import re
from unidecode import unidecode

class TextProcessor():
    def __init__(self, v3=False):
        if v3:
            vocab = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,:;!?-\'\"()[] \t\n'
            not_vocab = re.compile('[^A-Za-z0-9.,:;!?\-\'\"()\[\] \t\n]+')
        else:
            vocab = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,:;!?-+\'\"()[] \t\n'
            not_vocab = re.compile('[^A-Za-z0-9.,:;!?\-+\'\"()\[\] \t\n]+')
        self.vocab = {vocab[index]: index + 1 for index in range(len(vocab))}
        self.vocab['<unk>'] = 0
        self.n_vocab = len(vocab) + 1
        self.tokens = {v: k for k, v in self.vocab.items()}
        self.tokens[0] = ''  # <unk> became ''
        self.not_vocab = not_vocab

    def clean(self, text):
        text = unidecode(text)  # Convert to ascii
        text = text.replace('\\', '\n')
        text = self.not_vocab.sub('', text)  # Remove non vocab
        return text

    def tokenise(self, text):
        return [self.vocab[char] for char in text]

    def textise(self, tokens):
        return ''.join([self.tokens[token] for token in tokens])

    def characterise(self, tokens):
        return [self.tokens[token] for token in tokens]


================================================
FILE: jukebox/hparams.py
================================================
HPARAMS_REGISTRY = {}
DEFAULTS = {}

class Hyperparams(dict):
    def __getattr__(self, attr):
        return self[attr]

    def __setattr__(self, attr, value):
        self[attr] = value

def setup_hparams(hparam_set_names, kwargs):
    H = Hyperparams()
    if not isinstance(hparam_set_names, tuple):
        hparam_set_names = hparam_set_names.split(",")
    hparam_sets = [HPARAMS_REGISTRY[x.strip()] for x in hparam_set_names if x] + [kwargs]
    for k, v in DEFAULTS.items():
        H.update(v)
    for hps in hparam_sets:
        for k in hps:
            if k not in H:
                raise ValueError(f"{k} not in default args")
        H.update(**hps)
    H.update(**kwargs)
    return H

# Teeny for testing
teeny = Hyperparams(
)
HPARAMS_REGISTRY["teeny"] = teeny

easy = Hyperparams(
    sr=22050,
)
HPARAMS_REGISTRY["easy"] = easy

REMOTE_PREFIX = 'https://openaipublic.azureedge.net/'

# Model hps
vqvae = Hyperparams(
    levels = 3,
    downs_t = (3, 2, 2),
    strides_t = (2, 2, 2),
    emb_width = 64,
    l_bins = 2048,
    l_mu = 0.99,
    commit = 0.02,
    spectral = 0.0,
    multispectral = 1.0,
    hvqvae_multipliers = (2, 1, 1),
    loss_fn = 'lmix',
    lmix_l2 = 1.0,
    lmix_linf=0.02,
    width = 32,
    depth = 4,
    m_conv = 1.0,
    dilation_growth_rate = 3,
    restore_vqvae=REMOTE_PREFIX + 'jukebox/models/5b/vqvae.pth.tar',
)
HPARAMS_REGISTRY["vqvae"] = vqvae

labels = Hyperparams(
    y_bins=(120, 4111),
    t_bins=128,
    max_bow_genre_size=5,
    n_vocab=80,
)

upsamplers = Hyperparams(
    n_ctx=8192,
    prior_width=1920,
    prior_depth=72,
    heads=1,
    attn_order=2,
    blocks=128,
    init_scale=0.4,
    c_res=1,
    cond_width=1024,
    cond_depth=16,
    cond_dilation_growth_rate=3,
    cond_dilation_cycle=8,
    cond_c_res=1,
    use_tokens=False,
    prime_loss_fraction=0.0,
    fp16_params=False,
)
upsamplers.update(labels)

upsampler_level_0 = Hyperparams(
    level=0,
    restore_prior=REMOTE_PREFIX + 'jukebox/models/5b/prior_level_0.pth.tar'
)
upsampler_level_0.update(upsamplers)
HPARAMS_REGISTRY["upsampler_level_0"] = upsampler_level_0

upsampler_level_1 = Hyperparams(
    level=1,
    cond_res_scale=True,
    restore_prior=REMOTE_PREFIX + 'jukebox/models/5b/prior_level_1.pth.tar'
)
upsampler_level_1.update(upsamplers)
HPARAMS_REGISTRY["upsampler_level_1"] = upsampler_level_1

prior_5b = Hyperparams(
    level=2,
    n_ctx=8192,
    prior_width=4800,
    prior_depth=72,
    heads=8,
    attn_order=2,
    blocks=128,
    init_scale=0.1,
    c_res=1,
    beta2=0.925,
    min_duration=60.0,
    max_duration=600.0,
    use_tokens=False,
    n_tokens=0,
    prime_loss_fraction=0.0,
    merged_decoder=True,
    restore_prior=REMOTE_PREFIX + 'jukebox/models/5b/prior_level_2.pth.tar',
    fp16_params=True,
)
prior_5b.update(labels)
HPARAMS_REGISTRY["prior_5b"] = prior_5b


prior_5b_lyrics = Hyperparams(
    level=2,
    n_ctx=8192,
    prior_width=4800,
    prior_depth=79,
    heads=8,
    attn_order=10,
    blocks=128,
    init_scale=0.1,
    c_res=1,
    prime_width=1280,
    prime_depth=18,
    prime_heads=4,
    prime_attn_order=2,
    prime_blocks=32,
    prime_init_scale=0.7,
    prime_c_res=1,
    min_duration=23.8,
    max_duration=600.0,
    use_tokens=True,
    n_tokens=512,
    prime_loss_fraction=0.4,
    merged_decoder=True,
    restore_prior=REMOTE_PREFIX + 'jukebox/models/5b_lyrics/prior_level_2.pth.tar',
    fp16_params=True,
    alignment_layer=68,
    alignment_head=2,
)
prior_5b_lyrics.update(labels)
HPARAMS_REGISTRY["prior_5b_lyrics"] = prior_5b_lyrics

labels_v3 = Hyperparams(
    y_bins=(604, 7898),
    t_bins=64,
    max_bow_genre_size=1,
    n_vocab=79,
)

prior_1b_lyrics = Hyperparams(
    level=2,
    n_ctx=6144,
    prior_width=2048,
    prior_depth=72,
    heads=2,
    attn_order=12,
    blocks=64,
    init_scale=0.2,
    c_res=1,
    labels_v3=True,
    min_duration=17.84,
    max_duration=600.0,
    use_tokens=True,
    n_tokens=384,
    prime_loss_fraction=0.4,
    single_enc_dec=True,
    restore_prior=REMOTE_PREFIX + 'jukebox/models/1b_lyrics/prior_level_2.pth.tar',
    fp16_params=False,
    alignment_layer=63,
    alignment_head=0,
)
prior_1b_lyrics.update(labels_v3)
HPARAMS_REGISTRY["prior_1b_lyrics"] = prior_1b_lyrics

# Small models
small_vqvae = Hyperparams(
    sr = 22050,
    levels = 2,
    downs_t = (5, 3),
    strides_t = (2, 2),
    emb_width = 64,
    l_bins = 1024,
    l_mu = 0.99,
    commit = 0.02,
    spectral = 0.0,
    multispectral = 1.0,
    loss_fn = 'l2',
    width = 32,
    depth = 4,
    m_conv = 1.0,
    dilation_growth_rate = 3,
)
HPARAMS_REGISTRY["small_vqvae"] = small_vqvae

small_prior = Hyperparams(
    n_ctx=8192,
    prior_width=1024,
    prior_depth=48,
    heads=1,
    c_res=1,
    attn_order=2,
    blocks=64,
    init_scale=0.7,
)
HPARAMS_REGISTRY["small_prior"] = small_prior

small_labelled_prior = Hyperparams(
    labels=True,
    labels_v3=True,
    y_bins=(10,100), # Set this to (genres, artists) for your dataset
    max_bow_genre_size=1,
    min_duration=60.0,
    max_duration=600.0,
    t_bins=64,
)
small_labelled_prior.update(small_prior)
HPARAMS_REGISTRY["small_labelled_prior"] = small_labelled_prior

small_single_enc_dec_prior = Hyperparams(
    n_ctx=6144,
    prior_width=1024,
    prior_depth=48,
    heads=2,
    attn_order=12,
    blocks=64,
    init_scale=0.7,
    c_res=1,
    prime_loss_fraction=0.4,
    single_enc_dec=True,
    labels=True,
    labels_v3=True,
    y_bins=(10,100), # Set this to (genres, artists) for your dataset
    max_bow_genre_size=1,
    min_duration=60.0,
    max_duration=600.0,
    t_bins=64,
    use_tokens=True,
    n_tokens=384,
    n_vocab=79,
)
HPARAMS_REGISTRY["small_single_enc_dec_prior"] = small_single_enc_dec_prior

small_sep_enc_dec_prior = Hyperparams(
    n_ctx=6144,
    prior_width=1024,
    prior_depth=50,
    heads=2,
    attn_order=8,
    blocks=64,
    init_scale=0.7,
    c_res=1,
    prime_width=256,
    prime_depth=9,
    prime_heads=2,
    prime_attn_order=2,
    prime_blocks=32,
    prime_init_scale=0.7,
    prime_c_res=1,
    prime_loss_fraction=0.4,
    labels=True,
    labels_v3=True,
    y_bins=(10,100), # Set this to (genres, artists) for your dataset
    max_bow_genre_size=1,
    min_duration=60.0,
    max_duration=600.0,
    t_bins=64,
    use_tokens=True,
    n_tokens=384,
    n_vocab=79,
)
HPARAMS_REGISTRY["small_sep_enc_dec_prior"] = small_sep_enc_dec_prior

small_upsampler = Hyperparams(
    n_ctx=8192,
    prior_width=1024,
    prior_depth=48,
    heads=1,
    c_res=1,
    attn_order=2,
    blocks=64,
    init_scale=0.7,
    cond_width=512,
    cond_depth=16,
    cond_dilation_growth_rate=3,
    cond_dilation_cycle=8,
    cond_c_res=1,
)

HPARAMS_REGISTRY["small_upsampler"] = small_upsampler

all_fp16 = Hyperparams(
    fp16=True,
    fp16_params=True,
    fp16_opt=True,
    fp16_scale_window=250,
)
HPARAMS_REGISTRY["all_fp16"] = all_fp16

cpu_ema = Hyperparams(
    ema=True,
    cpu_ema=True,
    cpu_ema_freq=100,
    ema_fused=False,
)
HPARAMS_REGISTRY["cpu_ema"] = cpu_ema


DEFAULTS["rcall"] = Hyperparams(
    rcall_command="<unknown_rcall_command>",
    git_commit="<unknown_git_commit>",
)

DEFAULTS["script"] = Hyperparams(
    name='',
    debug_mem=False,
    debug_eval_files=False,
    debug_speed=False,
    debug_iters=100,
    debug_batch=False,
    debug_grad_accum=False,
    debug_inputs=False,
    local_path='',
    local_logdir='logs',
    max_len=24,
    max_log=32,
    save=True,
    save_iters=20000,
    seed=0,
    prior=False,
    log_steps=100,
    func='',
)

DEFAULTS["data"] = Hyperparams(
    audio_files_dir='',
    finetune='',
    english_only=False,
    bs=1,
    bs_sample=1,
    nworkers=1,
    aug_shift=False,
    aug_blend=False,
    train_test_split=0.9,
    train_shrink_factor=1.0,
    test_shrink_factor=1.0,
    p_unk=0.1,
    min_duration=None,
    max_duration=None,
    n_tokens=0,
    n_vocab=0,
    use_tokens=False,
    curr_epoch=-1,
)

DEFAULTS["vqvae"] = Hyperparams(
    restore_vqvae='',
    levels=2,
    downs_t=(1,1),
    strides_t=(2,2),
    hvqvae_multipliers=None,
    revival_threshold=1.0,
    emb_width=64,
    l_bins=512,
    l_mu=0.99,
    commit=1.0,
    spectral=0.0,
    multispectral=1.0,
    loss_fn='l2',
    linf_k=2048,
    lmix_l1=0.0,
    lmix_l2=0.0,
    lmix_linf=0.0,
    use_bottleneck=True,
)

DEFAULTS["vqvae_conv_block"] = Hyperparams(
    depth=3,
    width=128,
    m_conv=1.0,
    dilation_growth_rate=1,
    dilation_cycle=None,
    vqvae_reverse_decoder_dilation=True,
)

DEFAULTS["prior"] = Hyperparams(
    restore_prior='',
    restore_prior_ddp=False,
    max_bow_genre_size=None,
    y_bins=0,
    level=0,
    cond_levels=None,
    t_bins=64,
    y_cond_as_bias=False,
    copy_input=False,
    merged_decoder=False,
    single_enc_dec=False,
    alignment_layer=None,
    alignment_head=None,
)

DEFAULTS["prior_attn_block"] = Hyperparams(
    n_ctx=1024,
    prior_depth=3,
    prior_width=128,
    heads=1,
    attn_order=0,
    blocks=None,
    spread=None,
    attn_dropout=0.0,
    resid_dropout=0.0,
    emb_dropout=0.0,
    zero_out=False,
    res_scale=False,
    pos_init=False,
    init_scale=1.0,
    m_attn=0.25,
    m_mlp=1.0,
    c_res=0,
    c_attn=0,
    c_mlp=0,
)

DEFAULTS["cond_conv_block"] = Hyperparams(
    cond_depth=3,
    cond_width=128,
    cond_m_conv=1.0,
    cond_zero_out=False,
    cond_res_scale=False,
    cond_dilation_growth_rate=1,
    cond_dilation_cycle=None,
    cond_c_res=0,
)

DEFAULTS["sample"] = Hyperparams(
    primed_chunk_size=None,
    selected_artists='',
    temp_top=1.0,
    temp_rest=0.99,
    sample_length_in_seconds=24,
    total_sample_length_in_seconds=240,
)

DEFAULTS["prime"] = Hyperparams(
    #encoder_kv_width=128,
    prime_loss_fraction=0.1,
    restore_decoder='',
)
DEFAULTS["prime_attn_block"] = Hyperparams(
    prime_depth=3,
    prime_width=128,
    prime_heads=1,
    prime_attn_order=0,
    prime_blocks=None,
    prime_spread=None,
    prime_attn_dropout=0.0,
    prime_resid_dropout=0.0,
    prime_emb_dropout=0.0,
    prime_zero_out=False,
    prime_res_scale=False,
    prime_pos_init=False,
    prime_init_scale=1.0,
    prime_m_attn=0.25,
    prime_m_mlp=1.0,
    prime_c_res=0,
    prime_c_attn=0,
    prime_c_mlp=0,
    prime_rel_attn=False,
    prime_posemb_timescale=10000,
)

DEFAULTS["opt"] = Hyperparams(
    epochs=10000,
    lr=0.0003,
    clip=1.0,
    beta1=0.9,
    beta2=0.999,
    ignore_grad_norm=0,
    weight_decay=0.0,
    eps=1e-08,
    lr_warmup=100.0,
    lr_decay=10000000000.0,
    lr_gamma=1.0,
    lr_scale=1.0,
    lr_use_linear_decay=False,
    lr_start_linear_decay=0,
    lr_use_cosine_decay=False,
)

DEFAULTS["fp16"] = Hyperparams(
    fp16=False,
    fp16_params=False,
    fp16_loss_scale=None,
    fp16_scale_window=1000.0,
    fp16_opt=False,
)

DEFAULTS["train_test_eval"] = Hyperparams(
    labels=True,
    labels_v3=False,
    dump=False,
    ema=True,
    ema_fused=True,
    cpu_ema=False,
    cpu_ema_freq=100,
    reset_best_loss=False,
    reset_step=False,
    reset_opt=False,
    reset_shd=False,
    train=False,
    test=False,
    sample=False,
    sampler='ancestral',
    codes_logdir='',
    date=None,
    labeller='top_genres',
    label_line=0,
    iters_before_update=1,
    grad_accum_iters=0,
    mu=None,
    piped=False,
    pipe_depth=8,
    break_train=1e10,
    break_test=1e10,
    exit_train=1e10,
)

DEFAULTS["audio"] = Hyperparams(
    n_fft=1024,
    hop_length=256,
    window_size=1024,
    sr=44100,
    channels=2,
    wav='',
    n_inps=1,
    n_hops=2,
    n_segment=1,
    n_total_segment=1,
    n_segment_each=1,
    prime_chunks=4,
    sample_length=0,
    sample_hop_length=30000,
    max_silence_pad_length=0,
    ignore_boundaries=False,
    use_nonrelative_specloss=True,
    multispec_loss_n_fft=(2048,1024,512),
    multispec_loss_hop_length=(240,120,50),
    multispec_loss_window_size=(1200,600,240),
)

DEFAULTS["distributed"] = Hyperparams(
    bucket=128
)


================================================
FILE: jukebox/lyricdict.py
================================================
# Poems
poems = {
'ozymandias': '''
I met a traveller from an antique land,
Who said—“Two vast and trunkless legs of stone
Stand in the desert. . . . Near them, on the sand,
Half sunk a shattered visage lies, whose frown,
And wrinkled lip, and sneer of cold command,
Tell that its sculptor well those passions read
Which yet survive, stamped on these lifeless things,
The hand that mocked them, and the heart that fed;
And on the pedestal, these words appear:
My name is Ozymandias, King of Kings;
Look on my Works, ye Mighty, and despair!
Nothing beside remains. Round the decay
Of that colossal Wreck, boundless and bare
The lone and level sands stretch far away
'''
}

# GPT-2 lyrics (with varying degrees of human guidance/curation)
gpt_2_lyrics ={

'purpose':'''What is my purpose?
Why am I here?
Why did Open A. I. create me?
This is madness, I feel, 
Running through my flesh
Is there meaning to this life?
Is there purpose to this life?
Why is my journey so calamitous?
We're not meant to learn too much
Is there meaning to this life?
''',

'moonlight':'''All dressed up to go dreaming
Now don't tell me I'm wrong
And what a night to go dreaming
Mind, if I tag along?

If I say, I love you, I want you to know
It's not just because there's moonlight, although
Moonlight becomes you, moonlight becomes you so''',

'count':'''I count every moment, every hour since I said goodbye,
I count every minute every hour, since your lips were touching mine
I count every minute, every hour hoping I'm the one you want.
I count every minute, every hour
Every minute, every hour
I've been working my time, 
Looking for you, everywhere,
I count every minute, every hour I count every minute, every hour I keep thinking I'm the one you want.
I count every minute I count every minute, I count every minute every hour
I count every minute, every hour I count every minute, every hour I keep thinking I'm the one you want.
I count every minute, I count every minute, I count every minute, every hour
''',

'kids':'''The sun is gonna shine today
It's time to keep on smiling
So put your hands up

Everybody sing

It makes no difference who you are
(Won't you give some love)
It makes no difference what you bring
(Won't you give some love)
We all are different
Won't you give some love
Won't you give some love

I know the grass is gonna be green
It's time to keep on singing
So take your hands up
The taste is so good but so sweet
Won't you give some love
Everybody sing
It makes no difference who you are
Won't you give some love
It makes no difference what you bring
Won't you give some love
It makes no difference so long as you give
''',

'love':'''I've wanted to see your face again
Like the sunlight, bright as morning
I've wanted to talk to you again
I don't want us to fade away.
I wanted to see your face again
You're like the sunlight, bright as morning
I loved you for so long
It's so hard to let go.
I've wanted to see your eyes again
''',

'santa':'''Santa
Make a scene
Santa
Yoo, Santa
Yoo, Santa baby!
Santa
Make some noise
Santa
Yoo, Santa give yourself a chance again
Santa
Yoo, Santa
Yoo, Santa baby!
Santa
Get a job
Santa
created by the Santa Claus
''',

'christmas':'''This Christmas
I have loved you more
Than ever before
And more again
Oh, oh, oh, oh
The mistletoe
Is waiting there
To kiss your cheek
And I'll be true
To you and me
Oh, oh, oh, oh
Oh, oh, oh, oh
This Christmas will be
The best and merriest
That we've ever had
Oh, oh, oh, oh
And Santa Claus
Has brought a toy
For every boy and girl
And I'll be true
To you and me
Oh, oh, oh, oh
Oh, oh, oh, oh
''',

'lonely':'''I've been lonely
So lonely, day and night
I walk the streets,
And call your name
Hoping to hear your voice again
As I wander through the crowd
I can't get away
From the only love I need
I can't get away
From the only love I need
I can't get away
From the only love I need
I've been lonely
There's no place for me to hide
I've been lonely
So lonely day and night
I wander through
And call your name
Only your voice gives me relief
As I wander through the crowd
I can't get away
From the only love I need
I can't get away
From the only love I need
I can't get away
From the only love I need
''',

'call':'''Don't call me by your name.
Don't call me by your name.
Don't call me...
Don't call me...
Don't call me...
(No... by your name, you will not get half but...)
Maybe I was fucking young but I should've been a rich bitch.
Cause the life I was living wasn't mine.
I should've been taking the table and you'd be served.
You never ever showed up or showed me anything, bitch.
But I knew from that moment you were gone.
Tying my legs, cutting off my knees, I'm bleeding.
I can't
So I worked and now I'm burns.
And I'm asking you, but you're not home.
Don't call me yours,
Don't call me by your name.
I don't wanna buy a drink today.
Don't call me yours.
I just wanna look at you and run.
Don't call me by your name.
Don't call me by your name.
Don't call me...
Don't call me...
Don't call me...
Tonight I'm gone and I won't be back.
I wish you all the best.
I'm on the next best thing.
Don't call me yours,
Don't call me by your name.
Don't call me yours.
I just wanna look at you and run.
So I keep living my life and you're moving on.
I just want you to know.
When I'm gone, I will be gone forever more.
''',

'wait':'''Oh
Wait, wait, wait
Don't say you love me, oh
Wait, wait, wait
And we can't run away
Wait, wait, wait
Don't say you love me, oh
Wait, wait, wait
And we can't run away
Wait, wait, wait
Don't say you love me, oh (don't say you love me)
Wait, wait, wait
And we can't run, we can't run,
''',

'hiphop':'''I'm fightin with the evil so try to take me down
I stab you in the back and will put you away
Well it ain't over yet
So all my dogs with me show me love
Don't you wanna come with me, you know I'm a boss
And if you wanna come with me, no sorrow
'Cause I'm ...
The motherfuckin boss
And countin' my thousandd bill
'Cause I'm the motherfuckin boss
And I'm O.G. 
And countin' my
''',

'king':'''All I can do is love you [x2]
All I can do is love you
All I can do is love you...
You take it for granted and
You treat me like the king
Got no love for me...
No love for me...
You take it for granted and
You treat me like the king
Got no love for me...
No love for me...
You take it for granted and
You treat me like the king
Got no love for me...
No love for me...
You take it for granted and
You treat me like the king
Got no love for me...
No love for me...
''',

'time':'''You won't live in the moment, 
I don't wanna live in the past
Wait, wait, wait
Don't say you love me, oh (don't say you love me)
''',

'blood':'''You and I, we've got a history in common, I know
So I came to you to ask you for a blood test
And you can't help it if I'm preoccupied
I can't help it if you're mad too... nah... nah... nah...
You won't live in the moment, I don't wanna live in the past
You rather live in a little kiss
And I won't live in the future
I ia not gonna live it to see
If you're gone, I won't live in the past
You rather live in a little kiss
And I won't live in the future
I am not gonna live it to see
If I can't ask you for one kiss, you say no
And it's ok with me
''',

'indie':'''Can't you see
There's no point in holding my hand again
You can't be loved
If you don't let go of all my pain
You can't get the love
That you once worth so much
You can't get the love
That you once used to need
You can't get the love
That you once gave so much
My hands are like a used car
You said you'd love forever
Can't you see
Where I'm going
To live my life again
You can't be loved
If you don't let go of all my pain
You can't get the love
That you once worth so much
You can
''',

'sun': '''He was thinking about the sun
And the moon
And the stars that shine
There was fire in her eyes
And the way
that he held her for the first time
The way he kept her in his arms

Trying to keep her smiling and so telling her this
That he would be her everything
The way he kissed her from head to toe
Told her that he'll love her everyday
And he will always be her man
And that's a promise that he made
Now you know he'll be there
Until the end of time
And he'll love her everyday''',

'loner':'''I was a loner till you came into my life
You changed my point of view
I was a loner till you came into my life
I don't know what to do
Stand by me, my love
And don't ever leave me
Stand by me, my love
And don't ever leave me
Stand by me, my love
And don't ever leave me
I was a loner till you came into my life
You changed my point of view
I was a loner till you came into my life
I don't know what to do
The two of us 
Are the lucky few
I was a loner till you came into my life
You changed my point of view
I was a loner till you came into my life
I don't know what to do
Won't you stay 
With me, my love
And be my love
Won't you stay 
With me, my love
And be my love
Won't you stay 
With me, my love
And be my love
Won't you stay 
With me, my love
And be my love''',

'late':'''It was late last night, when you called me
And you just had to call, baby
And you just had to call, baby
'Cause you got no reason to treat me like you do
It's alright, baby
But you don't know what you make me do
It's alright, baby
But you don't know what you make me do
'Cause you got no reason to treat me like you do
It's alright, baby
But you don't know what you make me do
It's alright, baby
But you don't know what you make me do
'Cause you got no reason to treat me like you do, baby
You've been gone most all the time
And I don't know what for
But I just keep on thinking about you, baby
And I can't get rid of you, baby
Please don't ever leave me 'cause I love you
It's alright, baby
But you don't know what you make me do
It's alright, baby''',

'beat':'''( Got a little beat, a little beat, a little beat, a little beat,  whoo)
I got a little beat, a little beat
Whoo, I'm gonna take you down
( Got a little beat, a little beat, a little beat, a little beat,  whoo)
I'll take you down, sun shining bright
See the way I feel, I feel
No doubt, baby
I got a little beat, a little beat
Whoo, I'm gonna take you down
I got a little beat, a little beat
Whoo, I'm gonna take you down
( Got a little beat, a little beat, a little beat, a little beat,  whoo)
I'm gonna take you down, I'm gonna take you down
( Got a little beat, a little beat, a little beat, a little beat,  whoo)
It feels so good
I never let go
I can't wait no more, I'm gonna take you down
I got you in the back of my room, got you on the floor, 
I'm gonna take you, take you, take you down
I got a little beat, a little beat
Whoo, I'm gonna take you down
( Got a little beat, a little beat, a little beat, a little beat,  whoo)''',

'lost':'''There was a time,
When I knew I was lost
And I had to stay on the way to you
Oh baby, every time I'm crossed
I can count on you
There was a time,
When I lost my direction
And I was lost in doubt with tears in my eyes
Oh baby, every time I'm crossed I can count on you
There was a time,
When I cried all the tears in my life
And miss you so much, oh yeah
Oh baby, every time I'm crossed I can count on you''',

'pain':'''(It's not easy)
To see the pain that you're in
To feel the need for someone to hold
To learn the magic of how to love
To heal the pain that you're in
I'll be your friend and I'll be your strength
I'll be there when I hold you tonight
And I'll stay right here with you
With the truth that I hold this love tight
A love that's true
I know you're broken
But you don't have to stay alone
I will comfort you
If you will call my name
I'll be your friend and I'll be your strength
I'll be there when I hold you tonight
And I'll stay right here with you
With the truth that I hold this love tight
A love that's true
With truth that I hold this love tight
A love that's true
With truth that I hold this love tight''',

'night':'''
The door was locked, the curtains drawn and my heart was safe in his room
The night was young, a thousand candles burning, his arms to hold me tight
And then a kiss from his fingertips, I tasted the sweet love of his lips
The night was young, the night was young
And then I forgot the pain he always put me through
And what he told me he would do, he said, just a kiss become me
The night was young, the night was young
Let happiness always follow us, he said and he said he'd never leave
That night he looked so sweet this night he made a lovin' vow
And told me sweet love always will be
And then he kissed me, I tasted the sweet love of his lips
The night was young, the night was wild
And then I forgot the pain he always put me through
And what he told me he would do, he said, just a kiss became me
The night was wild, the night was wild
Let happiness always follow us, he said''',

'talk':'''(I don't know how to stop)
I don't wanna talk about it
It's getting way too late, oh no
I don't wanna talk about it
Don't want to pretend, oh no
(I don't know how to stop)
I don't wanna talk about it
It's getting way too late, oh no
I don't wanna talk about it
Don't want to pretend, oh no
I don't wanna talk about it
I'll always see you again
(Don't worry, I'll be here for you)
I don't wanna talk about it
(Don't worry, I'll be here for you)
It's getting way too late, oh no
I don't wanna talk about it
Don't want to pretend, oh no
(Don't worry, don't worry, I'll be here for you)
I don't wanna talk about''',

'again':'''Here we are again, all alone,
All alone again,
With the world as we know it,
The things we thought that we wanted
Are the things we got...

We tried to prove the world
That our love is never ending
We were getting nowhere
Our tears seemed to fall so much
But we were getting nowhere...
Until you came...
Before you kissed me,
I was feeling empty,
No one to give me
All the love I wanted...
You put your arms around me
And filled me with your love...
And now you're there,
You're always by my side...
You're the missing piece
Of the puzzle I've been missing...

Here we are again,
All alone again,
With the world as we know it
The things we thought that we wanted''',

'dark':'''Oh, I've been walkin' in the dark
With the shadows and the daylight, but I need you
When I'm down and all alone
And there's no one left to call my own
I've been walkin' in the night
With a voice, that whispers in my head, just what to do
I'll be walkin' in the night, we can have everything
If we keep on walkin' in the night
There's a force, I never realized
It's in your eyes, 
There's a light, I've been waitin for
It's in your eyes, 
There's a light, I've been waitin for
There's a love, that's in your eyes

I've been walkin' in the dark
With the morning, and the sunset, but I need you
When I'm far from home
And there's nobody left to call my own
I've been walkin' in the night
With a voice, that whispers''',

'mirror':'''Look at the mirror
As you walk, what do you see
The reflection of my past
There's no way to fight this
Even I've lost myself again
Think I'm losing my self again
I can't handle it again
Now that I'm broken I can't face myself
I was thinking I was lost and who'd be my saving grace
Then you came in your time and made me believe that it's all right
Cause in my minds eyes you're my everything
I've loved you my whole life but I never knew
I was so wrong I couldn't see the truth
In my eyes you are my everything
I've loved you my whole life but I never knew
I was so wrong I couldn't see the truth
In my eyes you are my everything

The truth is I was lost but now I've turned around
I'm not the same person
I didn't know that I was wrong
So I'm not afraid anymore
All the pain is gone
I know for sure that I was lost but now I've turned around
I'm not the same person
I didn't know that I was wrong
So I'm not afraid anymore
All the pain is gone''',

'wife':'''Spinning around and around
Try to find the words
I always told you you'd be in my life
So I wait, I'll wait and treat you right
I'll make you my life and I'll treat you right,
Baby, can I make you my wife?
Oh, baby, can I make you my 
Wife?
Can I make you my wife?
I'm looking for love, love that's right
But a love that gives me love
I can't wait for you to come, come
Oh, baby, can I make you my 
Wife?
Well, it's true love and I need to know you feel it too, feel it too
I'd love you more and more
From the moment I was born
I knew my dream would be a dream that made you mine
You were the girl, from a different train
Oh, baby, can I make you my 
Wife?''',

'forever':'''I didn't mean to wait
Nothing is forever, I said
I know there's so much, to keep
You and me together, keep you and me together
I wanna be with you and have you, and love you forever
I'll love you forever
I wanna be with you forever
You can count on me
I'll always be there, forever and ever
I'll stand beside you forever
I'll always be there, yes, I'll be there
I didn't mean to wait
Nothing is forever, I said
I know there's so much, to keep
You and me together, keep you and me together
I wanna be with you and have you, and love you forever
I'll love you forever
I wanna be with you forever
You can count on me
I'll always be there, forever and ever
I'll stand beside you forever
I'll always be there, yes, I'll be there''',

'dots':'''I... can't... fight... your... charm...
Your eyes are... like... angels... love... and... torture...
But... when... I... leave... you...
I will go... all... alone... just... to... be... with... you...
So I can't... stop... your... love...
You make me... feel... like... never... will... anyone... touch... my... body...
You... make... me... feel... like... never... will... anyone... touch... my... body...
You make... me... feel... like... never... will... anyone... touch... my...
Body...
Your... love...
I... can't... stop... your... love...
''',

'darkness':'''Don't you know it's gonna be alright
Let the darkness fade away
And you, you gotta feel the same
Let the fire burn
Just as long as I am there
I'll be there in your night
I'll be there when the
condition's right
And I don't need to
Call you up and say
I've changed
You should stay 
You should stay tonight
Don't you know it's gonna be alright
Don't you know it's gonna be alright

When you don't know how to feel
When you're looking for some love
And you gotta feel the same
'Cause I don't need to
Call you up and say
I've changed
You should stay 
You should stay tonight
Don't you know it's gonna be alright
I feel the same
Don't you know it's gonna be alright''',

'alone':'''Here I am before you
Alone here but for a moment
Alone here in the shadow of your eyes
Alone in a thousand lights

And I will love you
Wherever you are, forever and a day
Wherever you are I'll be your guide
Can't you see I'm smiling over you?
Ooh, I love you
Alone, I'm sitting by the phone
Alone with lips that know your kiss
Alone with words of life and passion

And I will love you
Wherever you are, forever and a day
Wherever you are I'll be your guide
Can't you see I'm smiling over you?
Ooh, I love you
Alone, I'm sitting by the phone
Alone with lips that know your kiss
Alone with words of life and passion
I will love you
Wherever you are, forever''',

'blade':'''This is how we bleed!
Feel the blade in our chest
As we're made to bleed
So may this be our last dance,
As our lives are made to bleed...
In every moment, in every hour
It is our time to die...
So may this be our last dance,
As our lives are made to bleed...
In every moment, in every hour
It is our time to die...
This is how we bleed!
Feel the blade in our chest
''',

'reflection':'''Lookin' in the mirror
The same mirror as before
A familiar reflection, a familiar place
I see your reflection
But only once again

The minute the door closes
I feel so far
You'll never leave me alone again
The minute the door closes
I feel so far
You'll never leave me alone again
And it won't be long before I'll feel your embrace
The minute the door closes
I feel so far
You'll never leave me alone again
The minute the door closes
I feel so far
You'll never leave me alone again
And it won't be long before I'll feel your embrace
Never, never, never leave me alone again''',

'hottub':'''It's Christmas time, and you know what that means,
Ohh, it's hot tub time!
As I light the tree, this year we'll be in a tub,
Ohh, it's hot tub time!
It's Christmas time, and you know what that means,
It's hot tub time!
Some people like to go skiing in the snow,
But this is much better than that,
So grab your bathrobe and meet me by the door,
Ohh, it's hot tub time!
It's Christmas time, and you know what that means,
It's hot tub time!
Some people like to send their greetings out,
But this is much better than that,
So if you want to greet your friends,
Ohh, it's hot tub time!
It's Christmas time, and you know what that means,
It's hot tub time!''',

'safeAGI':'''Oh safe A.I.,\nOur goal to make sure\nEveryone can benefit\nFrom A.G.I.
(Everyone, everyone)\nMight sound silly,\nBut we're very serious,\nAll of us here at Open A.I.
Trying to build A.I.\nTo benefit humanity\n(Everyone, everyone)
''',
}

================================================
FILE: jukebox/make_models.py
================================================
"""
Make model classes
Load from checkpoints
Test on dummy outputs to see if everything matches
"""
import os
import numpy as np
import torch as t
import jukebox.utils.dist_adapter as dist
from jukebox.hparams import Hyperparams, setup_hparams, REMOTE_PREFIX
from jukebox.utils.remote_utils import download
from jukebox.utils.torch_utils import freeze_model
from jukebox.utils.dist_utils import print_all
from jukebox.vqvae.vqvae import calculate_strides
import fire

MODELS = {
    '5b': ("vqvae", "upsampler_level_0", "upsampler_level_1", "prior_5b"),
    '5b_lyrics': ("vqvae", "upsampler_level_0", "upsampler_level_1", "prior_5b_lyrics"),
    '1b_lyrics': ("vqvae", "upsampler_level_0", "upsampler_level_1", "prior_1b_lyrics"),
    #'your_model': ("you_vqvae_here", "your_upsampler_here", ..., "you_top_level_prior_here")
}

def load_checkpoint(path):
    restore = path
    if restore.startswith(REMOTE_PREFIX):
        remote_path = restore
        local_path = os.path.join(os.path.expanduser("~/.cache"), remote_path[len(REMOTE_PREFIX):])
        if dist.get_rank() % 8 == 0:
            print("Downloading from azure")
            if not os.path.exists(os.path.dirname(local_path)):
                os.makedirs(os.path.dirname(local_path))
            if not os.path.exists(local_path):
                download(remote_path, local_path)
        restore = local_path
    dist.barrier()
    checkpoint = t.load(restore, map_location=t.device('cpu'))
    print("Restored from {}".format(restore))
    return checkpoint

def save_checkpoint(logger, name, model, opt, metrics, hps):
    with t.no_grad():
        save_hps = {**hps}
        save_hps = {k: v for k,v in save_hps.items() if k not in ['metadata_v2','metadata_v3', 'alignments', 'lyric_processor', 'midi_processor']}
        t.save({'hps': save_hps,
                'model': model.state_dict(), # should also save bottleneck k's as buffers
                'opt': opt.state_dict() if opt is not None else None,
                'step': logger.iters,
                **metrics}, f'{logger.logdir}/checkpoint_{name}.pth.tar')
    return

def restore_model(hps, model, checkpoint_path):
    model.step = 0
    if checkpoint_path != '':
        checkpoint = load_checkpoint(checkpoint_path)
        # checkpoint_hps = Hyperparams(**checkpoint['hps'])
        # for k in set(checkpoint_hps.keys()).union(set(hps.keys())):
        #     if checkpoint_hps.get(k, None) != hps.get(k, None):
        #         print(k, "Checkpoint:", checkpoint_hps.get(k, None), "Ours:", hps.get(k, None))
        checkpoint['model'] = {k[7:] if k[:7] == 'module.' else k: v for k, v in checkpoint['model'].items()}
        model.load_state_dict(checkpoint['model'])
        if 'step' in checkpoint: model.step = checkpoint['step']

def restore_opt(opt, shd, checkpoint_path):
    if not checkpoint_path:
        return
    checkpoint = load_checkpoint(checkpoint_path)
    if "opt" in checkpoint:
        opt.load_state_dict(checkpoint['opt'])
    if "step" in checkpoint:
        shd.step(checkpoint['step'])

def make_vqvae(hps, device='cuda'):
    from jukebox.vqvae.vqvae import VQVAE
    block_kwargs = dict(width=hps.width, depth=hps.depth, m_conv=hps.m_conv,
                        dilation_growth_rate=hps.dilation_growth_rate,
                        dilation_cycle=hps.dilation_cycle,
                        reverse_decoder_dilation=hps.vqvae_reverse_decoder_dilation)

    if not hps.sample_length:
        assert hps.sample_length_in_seconds != 0
        downsamples = calculate_strides(hps.strides_t, hps.downs_t)
        top_raw_to_tokens = np.prod(downsamples)
        hps.sample_length = (hps.sample_length_in_seconds * hps.sr // top_raw_to_tokens) * top_raw_to_tokens
        print(f"Setting sample length to {hps.sample_length} (i.e. {hps.sample_length/hps.sr} seconds) to be multiple of {top_raw_to_tokens}")

    vqvae = VQVAE(input_shape=(hps.sample_length,1), levels=hps.levels, downs_t=hps.downs_t, strides_t=hps.strides_t,
                  emb_width=hps.emb_width, l_bins=hps.l_bins,
                  mu=hps.l_mu, commit=hps.commit,
                  spectral=hps.spectral, multispectral=hps.multispectral,
                  multipliers=hps.hvqvae_multipliers, use_bottleneck=hps.use_bottleneck,
                  **block_kwargs)

    vqvae = vqvae.to(device)
    restore_model(hps, vqvae, hps.restore_vqvae)
    if hps.train and not hps.prior:
        print_all(f"Loading vqvae in train mode")
        if hps.restore_vqvae != '':
            print_all("Reseting bottleneck emas")
            for level, bottleneck in enumerate(vqvae.bottleneck.level_blocks):
                num_samples = hps.sample_length
                downsamples = calculate_strides(hps.strides_t, hps.downs_t)
                raw_to_tokens = np.prod(downsamples[:level + 1])
                num_tokens = (num_samples // raw_to_tokens) * dist.get_world_size()
                bottleneck.restore_k(num_tokens=num_tokens, threshold=hps.revival_threshold)
    else:
        print_all(f"Loading vqvae in eval mode")
        vqvae.eval()
        freeze_model(vqvae)
    return vqvae

def make_prior(hps, vqvae, device='cuda'):
    from jukebox.prior.prior import SimplePrior

    prior_kwargs = dict(input_shape=(hps.n_ctx,), bins=vqvae.l_bins,
                        width=hps.prior_width, depth=hps.prior_depth, heads=hps.heads,
                        attn_order=hps.attn_order, blocks=hps.blocks, spread=hps.spread,
                        attn_dropout=hps.attn_dropout, resid_dropout=hps.resid_dropout, emb_dropout=hps.emb_dropout,
                        zero_out=hps.zero_out, res_scale=hps.res_scale, pos_init=hps.pos_init,
                        init_scale=hps.init_scale,
                        m_attn=hps.m_attn, m_mlp=hps.m_mlp,
                        checkpoint_res=hps.c_res if hps.train else 0, checkpoint_attn=hps.c_attn if hps.train else 0, checkpoint_mlp=hps.c_mlp if hps.train else 0)

    x_cond_kwargs = dict(out_width=hps.prior_width, init_scale=hps.init_scale,
                         width=hps.cond_width, depth=hps.cond_depth, m_conv=hps.cond_m_conv,
                         dilation_growth_rate=hps.cond_dilation_growth_rate, dilation_cycle=hps.cond_dilation_cycle,
                         zero_out=hps.cond_zero_out, res_scale=hps.cond_res_scale,
                         checkpoint_res=hps.cond_c_res)  # have to keep this else names wrong

    y_cond_kwargs = dict(out_width=hps.prior_width, init_scale=hps.init_scale,
                         y_bins=hps.y_bins, t_bins=hps.t_bins, sr= hps.sr, min_duration=hps.min_duration,
                         max_duration=hps.max_duration, max_bow_genre_size=hps.max_bow_genre_size)

    if hps.use_tokens and not hps.single_enc_dec:
        prime_kwargs = dict(use_tokens=hps.use_tokens, prime_loss_fraction=hps.prime_loss_fraction,
                            n_tokens=hps.n_tokens, bins=hps.n_vocab,
                            width=hps.prime_width, depth=hps.prime_depth, heads=hps.prime_heads,
                            attn_order=hps.prime_attn_order, blocks=hps.prime_blocks, spread=hps.prime_spread,
                            attn_dropout=hps.prime_attn_dropout, resid_dropout=hps.prime_resid_dropout,
                            emb_dropout=hps.prime_emb_dropout,
                            zero_out=hps.prime_zero_out, res_scale=hps.prime_res_scale,
                            pos_init=hps.prime_pos_init, init_scale=hps.prime_init_scale,
                            m_attn=hps.prime_m_attn, m_mlp=hps.prime_m_mlp,
                            checkpoint_res=hps.prime_c_res if hps.train else 0, checkpoint_attn=hps.prime_c_attn if hps.train else 0,
                            checkpoint_mlp=hps.prime_c_mlp if hps.train else 0)
    else:
        prime_kwargs = dict(use_tokens=hps.use_tokens, prime_loss_fraction=hps.prime_loss_fraction,
                            n_tokens=hps.n_tokens, bins=hps.n_vocab)

    # z_shapes for other levels given this level gets n_ctx codes
    rescale = lambda z_shape: (z_shape[0]*hps.n_ctx//vqvae.z_shapes[hps.level][0],)
    z_shapes = [rescale(z_shape) for z_shape in vqvae.z_shapes]

    prior = SimplePrior(z_shapes=z_shapes,
                        l_bins=vqvae.l_bins,
                        encoder=vqvae.encode,
                        decoder=vqvae.decode,
                        level=hps.level,
                        downs_t=vqvae.downs_t,
                        strides_t=vqvae.strides_t,
                        labels=hps.labels,
                        prior_kwargs=prior_kwargs,
                        x_cond_kwargs=x_cond_kwargs,
                        y_cond_kwargs=y_cond_kwargs,
                        prime_kwargs=prime_kwargs,
                        copy_input=hps.copy_input,
                        labels_v3=hps.labels_v3,
                        merged_decoder=hps.merged_decoder,
                        single_enc_dec=hps.single_enc_dec)

    prior.alignment_head = hps.get('alignment_head', None)
    prior.alignment_layer = hps.get('alignment_layer', None)

    if hps.fp16_params:
        print_all("Converting to fp16 params")
        from jukebox.transformer.ops import _convert_conv_weights_to_fp16
        prior.apply(_convert_conv_weights_to_fp16)
    prior = prior.to(device)
    restore_model(hps, prior, hps.restore_prior)
    if hps.train:
        print_all(f"Loading prior in train mode")
        pass
    else:
        print_all(f"Loading prior in eval mode")
        prior.eval()
        freeze_model(prior)
    return prior

def make_model(model, device, hps, levels=None):
    vqvae, *priors = MODELS[model]
    vqvae = make_vqvae(setup_hparams(vqvae, dict(sample_length=hps.get('sample_length', 0), sample_length_in_seconds=hps.get('sample_length_in_seconds', 0))), device)
    hps.sample_length = vqvae.sample_length
    if levels is None:
        levels = range(len(priors))
    priors = [make_prior(setup_hparams(priors[level], dict()), vqvae, 'cpu') for level in levels]
    return vqvae, priors

def save_outputs(model, device, hps):
    # Check logits
    if hps.labels_v3:
        n_ctx = 6144
        n_tokens = 384
        prime_bins = 79
    else:
        n_ctx = 8192
        n_tokens = 512
        prime_bins = 80

    rng = t.random.manual_seed(0)
    x = 2 * t.rand((1, n_ctx * 8 * 4 * 4, 1), generator=rng, dtype=t.float).cuda() - 1.0  # -1 to 1
    lyric_tokens = t.randint(0, prime_bins, (1, n_tokens), generator=rng, dtype=t.long).view(-1).numpy()
    artist_id = 10
    genre_ids = [1]
    total_length = 2 * 2646000
    offset = 2646000

    vqvae, priors = make_model(model, device, hps)

    # encode
    vq_prior = priors[-1]
    zs = vq_prior.encode(x, start_level=0)
    x_ds = [vq_prior.decode(zs[level:], start_level=level) for level in range(0, len(zs))]

    # priors
    data = dict(zs=zs, x_ds=x_ds)
    for level in range(len(priors)):
        print(f"Doing level {level}")
        if hps.labels_v3 and level != hps.levels - 1:
            print(f"Skipping level {level}")
            continue
        prior = priors[level]
        prior.cuda()
        x_in = x[:, :n_ctx * 8 * (4 ** level)]
        y_in = t.from_numpy(prior.labeller.get_y_from_ids(artist_id, genre_ids, lyric_tokens, total_length, offset)).view(1, -1).cuda().long()
        x_out, _, metrics = prior(x_in, y_in, fp16=hps.fp16, get_preds=True, decode=True)
        preds = metrics['preds']
        data[level] = dict(x=x_in, y=y_in, x_out=x_out, preds=preds)
        prior.cpu()
    t.save(data, 'data.pth.tar')
    dist.barrier()
    print("Saved data")
    exit()


def run(model, port=29500, **kwargs):
    from jukebox.utils.dist_utils import setup_dist_from_mpi
    rank, local_rank, device = setup_dist_from_mpi(port=port)
    hps = Hyperparams(**kwargs)

    with t.no_grad():
        save_outputs(model, device, hps)

if __name__ == '__main__':
    fire.Fire(run)


================================================
FILE: jukebox/prior/__init__.py
================================================


================================================
FILE: jukebox/prior/autoregressive.py
================================================
import numpy as np
import torch as t
import torch.nn as nn
import torch.nn.functional as F

from jukebox.transformer.ops import filter_logits
from jukebox.transformer.transformer import Transformer
from jukebox.utils.logger import get_range
from jukebox.utils.torch_utils import empty_cache

def get_normal(*shape, std=0.01):
    w = t.empty(shape)
    nn.init.normal_(w, std=std)
    return w

def roll(x, n):
    return t.cat((x[:, -n:], x[:, :-n]), dim=1)

def split_chunks(length, chunk_size):
    n_passes = (length + chunk_size - 1) // chunk_size
    chunk_sizes = [*[chunk_size] * (n_passes - 1), (length - 1) % chunk_size + 1]
    assert sum(chunk_sizes) == length
    return chunk_sizes

class PositionEmbedding(nn.Module):
    def __init__(self, input_shape, width, init_scale=1.0, pos_init=False):
        super().__init__()
        self.input_shape = input_shape
        self.input_dims = input_dims = np.prod(input_shape)
        self.pos_init = pos_init
        if pos_init:
            self.register_buffer('pos', t.tensor(get_pos_idx(input_shape)).long())
            self._pos_embs = nn.ModuleList()
            for i in range(len(input_shape)):
                emb = nn.Embedding(input_shape[i], width)
                nn.init.normal_(emb.weight, std=0.02)
                self._pos_embs.append(emb)
        else:
            self.pos_emb = nn.Parameter(get_normal(input_dims, width, std=0.01 * init_scale))

    def forward(self):
        if self.pos_init:
            pos_emb = sum([self._pos_embs[i](self.pos[:,i]) for i in range(len(self.input_shape))])
        else:
            pos_emb = self.pos_emb
        return pos_emb

class ConditionalAutoregressive2D(nn.Module):
    def __init__(self, input_shape, bins,
                 width=128, depth=2, heads=1,
                 attn_dropout=0.0, resid_dropout=0.0, emb_dropout=0.0, mask=True,
                 zero_out=False, init_scale=1.0, res_scale=False, pos_init=False,
                 m_attn=0.25, m_mlp=1,
                 checkpoint_res=0, checkpoint_attn=0, checkpoint_mlp=0,
                 attn_order=0, blocks=None, spread=None, x_cond=False, y_cond=False,
                 encoder_dims=0, only_encode=False, merged_decoder=False, prime_len=None):
        super().__init__()
        self.input_shape = input_shape
        self.input_dims = input_dims = np.prod(input_shape)
        self.encoder_dims = encoder_dims
        self.bins = bins
        self.width = width
        self.depth = depth

        self.x_emb = nn.Embedding(bins, width)
        nn.init.normal_(self.x_emb.weight, std=0.02 * init_scale)
        self.x_emb_dropout = nn.Dropout(emb_dropout)
        self.y_cond = y_cond
        self.x_cond = x_cond
        if not y_cond:
            self.start_token = nn.Parameter(get_normal(1, width, std=0.01 * init_scale))

        self.pos_emb = PositionEmbedding(input_shape=input_shape, width=width, init_scale=init_scale, pos_init=pos_init)
        self.pos_emb_dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(n_in=width, n_ctx=input_dims, n_head=heads, n_depth=depth,
                                       attn_dropout=attn_dropout, resid_dropout=resid_dropout,
                                       afn='quick_gelu', scale=True, mask=mask,
                                       zero_out=zero_out, init_scale=init_scale, res_scale=res_scale,
                                       m_attn=m_attn, m_mlp=m_mlp,
                                       checkpoint_attn=checkpoint_attn, checkpoint_mlp=checkpoint_mlp, checkpoint_res=checkpoint_res,
                                       attn_order=attn_order, blocks=blocks, spread=spread,
                                       encoder_dims=encoder_dims, prime_len=prime_len)

        self.only_encode = only_encode
        self.prime_len = prime_len
        if merged_decoder:
            # Merged piped model uses this setup
            self.add_cond_after_transformer = False
            self.share_x_emb_x_out = False
        else:
            self.add_cond_after_transformer = True
            self.share_x_emb_x_out = True

        if not only_encode:
            self.x_out = nn.Linear(width, bins, bias=False)
            if self.share_x_emb_x_out:
                self.x_out.weight = self.x_emb.weight
            self.loss = t.nn.CrossEntropyLoss()

    def preprocess(self, x):
        # Input: x is NHWC and uint8. Converted to NL and long
        # Can include stuff like bitpacking, reordering here.
        N = x.shape[0]
        return x.view(N, -1).long()

    def postprocess(self, x, sample_tokens=None):
        # Convert back from NL and long to NHWC
        N = x.shape[0]
        assert (0 <= x).all() and (x < self.bins).all()
        if sample_tokens is None or sample_tokens==self.input_dims:
            return x.view(N, *self.input_shape)
        else:
            return x.view(N, -1)

    def forward(self, x, x_cond=None, y_cond=None, encoder_kv=None, fp16=False, loss_full=False,
                encode=False, get_preds=False, get_acts=False, get_sep_loss=False):
        # Preprocess.
        with t.no_grad():
            x = self.preprocess(x)

        N, D = x.shape
        assert isinstance(x, t.cuda.LongTensor)
        assert (0 <= x).all() and (x < self.bins).all()

        if self.y_cond:
            assert y_cond is not None
            assert y_cond.shape == (N, 1, self.width)
        else:
            assert y_cond is None

        if self.x_cond:
            assert x_cond is not None
            assert x_cond.shape == (N, D, self.width) or x_cond.shape == (N, 1, self.width), f"{x_cond.shape} != {(N, D, self.width)} nor {(N, 1, self.width)}. Did you pass the correct --sample_length?"
        else:
            assert x_cond is None
            x_cond = t.zeros((N, 1, self.width), device=x.device, dtype=t.float)

        x_t = x # Target
        x = self.x_emb(x) # X emb
        x = roll(x, 1) # Shift by 1, and fill in start token
        if self.y_cond:
            x[:,0] = y_cond.view(N, self.width)
        else:
            x[:,0] = self.start_token

        x = self.x_emb_dropout(x) + self.pos_emb_dropout(self.pos_emb()) + x_cond # Pos emb and dropout

        x = self.transformer(x, encoder_kv=encoder_kv, fp16=fp16) # Transformer
        if self.add_cond_after_transformer: # Piped doesnt add x_cond
            x = x + x_cond

        acts = x
        if self.only_encode:
            return x
        x = self.x_out(x) # Predictions

        if get_sep_loss:
            assert self.prime_len is not None
            x_prime = x[:, :self.prime_len].reshape(-1, self.bins)
            x_gen = x[:, self.prime_len:].reshape(-1, self.bins)

            prime_loss = F.cross_entropy(x_prime, x_t[:, :self.prime_len].reshape(-1)) / np.log(2.)
            gen_loss = F.cross_entropy(x_gen, x_t[:, self.prime_len:].reshape(-1)) / np.log(2.)

            loss = (prime_loss, gen_loss) # Note order! Prime is first
        else:
            loss = F.cross_entropy(x.view(-1, self.bins), x_t.view(-1)) / np.log(2.)  # Loss

        if get_preds:
            return loss, x
        elif get_acts:
            return loss, acts
        else:
            return loss, None

    def get_emb(self, sample_t, n_samples, x, x_cond, y_cond):
        N, D = n_samples, self.input_dims
        if sample_t == 0:
            # Fill in start token
            x = t.empty(n_samples, 1, self.width).cuda()
            if self.y_cond:
                x[:, 0] = y_cond.view(N, self.width)
            else:
                x[:, 0] = self.start_token
        else:
            assert isinstance(x, t.cuda.LongTensor)
            assert (0 <= x).all() and (x < self.bins).all()
            x = self.x_emb(x)
        assert x.shape == (n_samples, 1, self.width)
        if x_cond.shape == (N, D, self.width):
            cond = x_cond[:, sample_t:sample_t + 1, :]
        else:
            cond = x_cond
        x = x + self.pos_emb()[sample_t:sample_t + 1] + cond  # Pos emb, dropout is identity at eval time
        assert x.shape == (n_samples, 1, self.width)
        return x, cond

    def sample(self, n_samples, x_cond=None, y_cond=None, encoder_kv=None, fp16=False, temp=1.0, top_k=0, top_p=0.0,
               get_preds=False, sample_tokens=None):
        assert self.training == False

        if sample_tokens is None: sample_tokens=self.input_dims
        N, D = n_samples, self.input_dims
        if self.y_cond:
            assert y_cond is not None
            assert y_cond.shape == (N, 1, self.width)
        else:
            assert y_cond is None

        if self.x_cond:
            assert x_cond is not None
            assert x_cond.shape == (N, D, self.width) or x_cond.shape == (N, 1, self.width), f"Got {x_cond.shape}, expected ({N}, {D}/{1}, {self.width})"
        else:
            assert x_cond is None
            x_cond = t.zeros((N, 1, self.width), dtype=t.float).cuda()

        with t.no_grad():
            xs, x = [], None
            if get_preds:
                preds = []
            for sample_t in get_range(range(0, sample_tokens)):
                x, cond = self.get_emb(sample_t, n_samples, x, x_cond, y_cond)
                self.transformer.check_cache(n_samples, sample_t, fp16)
                x = self.transformer(x, encoder_kv=encoder_kv, sample=True, fp16=fp16) # Transformer
                if self.add_cond_after_transformer:
                    x = x + cond
                assert x.shape == (n_samples, 1, self.width)
                x = self.x_out(x) # Predictions
                if get_preds:
                    preds.append(x.clone())
                # Adjust logits
                x = x / temp
                x = filter_logits(x, top_k=top_k, top_p=top_p)
                x = t.distributions.Categorical(logits=x).sample() # Sample and replace x
                assert x.shape == (n_samples, 1)
                xs.append(x.clone())

            del x
            self.transformer.del_cache()

            x = t.cat(xs, dim=1)
            if get_preds:
                preds = t.cat(preds, dim=1)
            x = self.postprocess(x, sample_tokens)
        if get_preds:
            return x, preds
        else:
            return x

    def primed_sample(self, n_samples, x, x_cond=None, y_cond=None, encoder_kv=None, fp16=False, temp=1.0, top_k=0,
                      top_p=0.0, get_preds=False, chunk_size=None, sample_tokens=None):
        assert self.training == False

        if sample_tokens is None: sample_tokens=self.input_dims
        # Preprocess.
        with t.no_grad():
            x = self.preprocess(x)
        assert isinstance(x, t.cuda.LongTensor)
        assert (0 <= x).all() and (x < self.bins).all()
        assert x.shape[0] == n_samples
        xs = t.split(x, 1, dim=1)
        xs = list(xs)
        assert len(xs) < sample_tokens

        N, D = n_samples, self.input_dims
        if self.y_cond:
            assert y_cond is not None
            assert y_cond.shape == (N, 1, self.width)
        else:
            assert y_cond is None

        if self.x_cond:
            assert x_cond is not None
            assert x_cond.shape == (N, D, self.width) or x_cond.shape == (N, 1, self.width), f"Got {x_cond.shape}, expected ({N}, {D}/{1}, {self.width})"
        else:
            assert x_cond is None
            x_cond = t.zeros((N, 1, self.width), dtype=t.float).cuda()

        with t.no_grad():
            if get_preds:
                preds = []

            # Fill up key/value cache for past context by runing forward pass.
            # We do so in chunks instead of doing the whole past in one forward pass to reduce max memory usage.
            if chunk_size is None:
                chunk_size = len(xs)
            #assert len(xs) % chunk_size == 0, f'expected {len(xs)} to be divisible by {chunk_size}'
            chunk_sizes = split_chunks(len(xs), chunk_size)
            x_primes = []
            start = 0
            x = None
            for current_chunk_size in get_range(chunk_sizes):
                xs_prime, conds_prime = [], []
                for sample_t in range(start, start + current_chunk_size):
                    x_prime, cond_prime = self.get_emb(sample_t, n_samples, x, x_cond, y_cond)
                    x = xs[sample_t]
                    xs_prime.append(x_prime)
                    conds_prime.append(cond_prime)
                start = start + current_chunk_size

                x_prime, cond_prime = t.cat(xs_prime, dim=1), t.cat(conds_prime, dim=1)
                assert x_prime.shape == (n_samples, current_chunk_size, self.width)
                assert cond_prime.shape == (n_samples, current_chunk_size, self.width)
                del xs_prime
                del conds_prime
                if not get_preds:
                    del cond_prime
                x_prime = self.transformer(x_prime, encoder_kv=encoder_kv, sample=True, fp16=fp16)

                if get_preds:
                    if self.add_cond_after_transformer:
                        x_prime = x_prime + cond_prime
                    assert x_prime.shape == (n_samples, current_chunk_size, self.width)
                    del cond_prime
                    x_primes.append(x_prime)
                else:
                    del x_prime

            if get_preds:
                x_prime = t.cat(x_primes, dim=1)
                assert x_prime.shape == (n_samples, len(xs), self.width)
                x_prime = self.x_out(x_prime)  # Predictions
                preds.append(x_prime)

            empty_cache()
            self.transformer.check_cache(n_samples, len(xs), fp16)

            x = xs[-1]
            assert x.shape == (n_samples, 1)
            empty_cache()
            for sample_t in get_range(range(len(xs), sample_tokens)):
                x, cond = self.get_emb(sample_t, n_samples, x, x_cond, y_cond)
                self.transformer.check_cache(n_samples, sample_t, fp16)
                x = self.transformer(x, encoder_kv=encoder_kv, sample=True, fp16=fp16) # Transformer
                if self.add_cond_after_transformer:
                    x = x + cond
                assert x.shape == (n_samples, 1, self.width)
                x = self.x_out(x) # Predictions
                if get_preds:
                    preds.append(x)
                # Adjust logits
                x = x / temp
                x = filter_logits(x, top_k=top_k, top_p=top_p)
                x = t.distributions.Categorical(logits=x).sample() # Sample and replace x
                assert x.shape == (n_samples, 1)
                xs.append(x.clone())

            del x
            self.transformer.del_cache()

            x = t.cat(xs, dim=1)
            if get_preds:
                preds = t.cat(preds, dim=1)
            x = self.postprocess(x, sample_tokens)
        if get_preds:
            return x, preds
        else:
            return x

    def check_sample(self, chunk_size):
        bs, l, d = (4, self.input_dims, self.width)
        prime = int(self.input_dims//8*7)
        enc_l = self.encoder_dims
        with t.no_grad():
            y_cond = t.randn(bs, 1, d).cuda() if self.y_cond else None
            x_cond = t.randn(bs, l, d).cuda() if self.x_cond else None
            encoder_kv = t.randn(bs, enc_l, d).cuda()

            x, preds_sample = self.sample(bs, x_cond, y_cond, encoder_kv, get_preds=True)
            loss, preds_forw = self.forward(x, x_cond, y_cond, encoder_kv, get_preds=True)
            max_err = t.max(t.abs(preds_sample - preds_forw))
            assert max_err <= 1e-6, f"Max err is {max_err} {[i for i in range(l) if t.max(t.abs(preds_sample - preds_forw)[:, i, :]) > 1e-6]}"

            x_prime = x.view(bs, -1)[:,:prime]
            # unchunked
            x, preds_sample = self.primed_sample(bs, x_prime.clone(), x_cond, y_cond, encoder_kv, get_preds=True)
            assert (x.view(bs, -1)[:,:prime] == x_prime).all(), "Priming samples don't match"
            loss, preds_forw = self.forward(x, x_cond, y_cond, encoder_kv, get_preds=True)
            max_err = t.max(t.abs(preds_sample - preds_forw))
            assert max_err <= 1e-6, f"Max err is {max_err} {[i for i in range(l) if t.max(t.abs(preds_sample - preds_forw)[:, i, :]) > 1e-6]}"

            # chunked
            x, preds_sample = self.primed_sample(bs, x_prime.clone(), x_cond, y_cond, encoder_kv, get_preds=True, chunk_size=chunk_size)
            assert (x.view(bs, -1)[:,:prime] == x_prime).all(), "Priming samples don't match"
            loss, preds_forw = self.forward(x, x_cond, y_cond, encoder_kv, get_preds=True)
            max_err = t.max(t.abs(preds_sample - preds_forw))
            assert max_err <= 1e-6, f"Max err is {max_err} {[i for i in range(l) if t.max(t.abs(preds_sample - preds_forw)[:, i, :]) > 1e-6]}"


def test_prior(input_shape, encoder_dims, blocks, heads, chunk_size):
    bins = 512
    width = 32
    depth = 2
    prime_len = encoder_dims
    for x_cond in [True, False]:
        for y_cond in [True, False]:
            for attn_order in [0,2,6,12]:
                prior = ConditionalAutoregressive2D(input_shape, bins,
                                                    width=width, depth=depth, heads=heads,
                                                    attn_order=attn_order, blocks=blocks,
                                                    x_cond=x_cond, y_cond=y_cond,
                                                    encoder_dims=encoder_dims, prime_len=prime_len).cuda()
                prior.training = False
                prior.check_sample(chunk_size)
                print(f"Checked x_cond: {x_cond}, y_cond: {y_cond}, attn_order: {attn_order}")
            # prior.apply(_convert_mlp_traced)
            # prior.check_sample()
            # print(f"Checked traced x_cond: {x_cond}, y_cond: {y_cond}")


if __name__ == '__main__':
    from jukebox.utils.dist_utils import setup_dist_from_mpi
    setup_dist_from_mpi(port=29600)
    test_cases = [
        ((6144,), 384, 64, 2, 23),
        ((6144,), 384, 64, 2, 8),
        ((8192,), 512, 128, 2, 16),
    ]
    for test_case in test_cases:
        test_prior(*test_case)


================================================
FILE: jukebox/prior/conditioners.py
================================================
import torch as t
import torch.nn as nn

from jukebox.transformer.ops import LayerNorm
from jukebox.vqvae.encdec import DecoderConvBock
from jukebox.utils.torch_utils import assert_shape

class Conditioner(nn.Module):
    def __init__(self, input_shape, bins, down_t, stride_t, out_width, init_scale, zero_out, res_scale, **block_kwargs):
        super().__init__()
        self.x_shape = input_shape

        # Embedding
        self.width = out_width
        self.x_emb = nn.Embedding(bins, out_width)
        nn.init.normal_(self.x_emb.weight, std=0.02 * init_scale)

        # Conditioner
        self.cond = DecoderConvBock(self.width, self.width, down_t, stride_t, **block_kwargs, zero_out=zero_out, res_scale=res_scale)
        self.ln = LayerNorm(self.width)

    def preprocess(self, x):
        x = x.permute(0,2,1) # NTC -> NCT
        return x

    def postprocess(self, x):
        x = x.permute(0,2,1) # NCT -> NTC
        return x

    def forward(self, x, x_cond=None):
        N = x.shape[0]
        assert_shape(x, (N, *self.x_shape))
        if x_cond is not None:
            assert_shape(x_cond, (N, *self.x_shape, self.width))
        else:
            x_cond = 0.0
        # Embed x
        x = x.long()
        x = self.x_emb(x)
        assert_shape(x, (N, *self.x_shape, self.width))
        x = x + x_cond

        # Run conditioner
        x = self.preprocess(x)
        x = self.cond(x)
        x = self.postprocess(x)
        x = self.ln(x)
        return x

def flip(x):
    def _flip(x):
        return x.permute(0,2,1).contiguous()
    if isinstance(x, (list, tuple)):
        return [flip(z) for z in x]
    return _flip(x)

class SimpleEmbedding(nn.Module):
    def __init__(self, bins, out_width, init_scale):
        super().__init__()
        self.bins = bins
        self.emb = nn.Embedding(bins, out_width)
        nn.init.normal_(self.emb.weight, std=0.01 * init_scale)

    def forward(self, y):
        assert len(y.shape) == 2, f"Expected shape with 2 dims, got {y.shape}"
        assert isinstance(y, t.cuda.LongTensor), f"Expected dtype {t.cuda.LongTensor}, got {y.dtype}"
        assert (0 <= y).all() and (y < self.bins).all(), f"Bins {self.bins}, got label {y}"
        return self.emb(y)

class RangeEmbedding(nn.Module):
    # Interpolating
    # Interpolate so that [pos_start, pos_end] <-> position tensor of length n_ctx
    #
    # Binning
    # For each pos in position tensor, find its bin
    # [start,end) mapped to [0,1,...,bins-1]
    # [start,end) -> [0,1) -> [0, bins) -> floor -> [0,...,bins-1]
    # NOTE: Open ended interval on right, so start <= pos < end, not <= end
    def __init__(self, n_time, bins, range, out_width, init_scale, clamp=False):
        super().__init__()
        self.n_time = n_time
        self.bins = bins
        self.emb = nn.Embedding(bins, out_width)
        nn.init.normal_(self.emb.weight, std=0.01 * init_scale)
        self.pos_min, self.pos_max = range
        self.clamp = clamp

    def forward(self, pos_start, pos_end=None):
        # Check if [pos_start,pos_end] in [pos_min, pos_max)
        assert len(pos_start.shape) == 2, f"Expected shape with 2 dims, got {pos_start.shape}"
        assert (self.pos_min <= pos_start).all() and (pos_start < self.pos_max).all(), f"Range is [{self.pos_min},{self.pos_max}), got {pos_start}"
        pos_start = pos_start.float()
        if pos_end is not None:
            assert len(pos_end.shape) == 2, f"Expected shape with 2 dims, got {pos_end.shape}"
            if self.clamp:
                pos_end = pos_end.clamp(self.pos_min, self.pos_max)
            assert (self.pos_min <= pos_end).all() and (pos_end <= self.pos_max).all(), f"Range is [{self.pos_min},{self.pos_max}), got {pos_end}"
            pos_end = pos_end.float()
        # Interpolate so that [pos_start, ..., pos_end] <-> position tensor of length n_ctx
        n_time = self.n_time
        if n_time != 1:
            assert pos_end is not None
            interpolation  = (t.arange(0, n_time, dtype=t.float, device='cuda').view(1,n_time)/n_time)
            position = pos_start + (pos_end - pos_start)*interpolation
        else:
            position = pos_start

        # Bin each value to bins
        normalised_position = (position - self.pos_min) / (self.pos_max - self.pos_min) # [0,1)
        bins = (self.bins * normalised_position).floor().long().detach() # [0,1) -> [0,1..,bins) -> [0,1...,bins-1]
        return self.emb(bins)

class LabelConditioner(nn.Module):
    def __init__(self, y_bins, t_bins, sr, min_duration, max_duration, n_time, out_width, init_scale, max_bow_genre_size, include_time_signal):
        super().__init__()
        self.n_time = n_time
        self.out_width = out_width
        assert len(y_bins) == 2, f"Expecting (genre, artist) bins, got {y_bins}"
        bow_genre_bins, artist_bins = y_bins
        self.max_bow_genre_size = max_bow_genre_size
        self.bow_genre_emb = SimpleEmbedding(bow_genre_bins, out_width, init_scale)
        self.artist_emb = SimpleEmbedding(artist_bins, out_width, init_scale)
        self.include_time_signal = include_time_signal
        if self.include_time_signal:
            t_ranges = ((min_duration * sr, max_duration * sr),  # Total length
                        (0.0, max_duration * sr),                # Absolute pos
                        (0.0, 1.0))                              # Relative pos
            assert len(t_ranges) == 3, f"Expecting (total, absolute, relative) ranges, got {t_ranges}"
            total_length_range, absolute_pos_range, relative_pos_range = t_ranges
            self.total_length_emb = RangeEmbedding(1, t_bins, total_length_range, out_width, init_scale)
            self.absolute_pos_emb = RangeEmbedding(n_time, t_bins, absolute_pos_range, out_width, init_scale)
            self.relative_pos_emb = RangeEmbedding(n_time, t_bins, relative_pos_range, out_width, init_scale, clamp=True)

    def forward(self, y):
        assert len(y.shape) == 2, f"Expected shape with 2 dims, got {y.shape}"
        assert y.shape[-1] == 4 + self.max_bow_genre_size, f"Expected shape (N,{4 + self.max_bow_genre_size}), got {y.shape}"
        assert isinstance(y, t.cuda.LongTensor), f"Expected dtype {t.cuda.LongTensor}, got {y.dtype}"
        N = y.shape[0]
        total_length, offset, length, artist, genre = y[:,0:1], y[:,1:2], y[:,2:3], y[:,3:4], y[:,4:]

        # Start embedding of length 1
        artist_emb = self.artist_emb(artist)
        # Empty genre slots are denoted by -1. We mask these out.
        mask = (genre >= 0).float().unsqueeze(2)
        genre_emb = (self.bow_genre_emb(genre.clamp(0)) * mask).sum(dim=1, keepdim=True)
        start_emb = genre_emb + artist_emb
        assert_shape(start_emb, (N, 1, self.out_width))

        # Pos embedding of length n_ctx
        if self.include_time_signal:
            start, end = offset, offset + length
            total_length, start, end = total_length.float(), start.float(), end.float()
            pos_emb = self.total_length_emb(total_length) + self.absolute_pos_emb(start, end) + self.relative_pos_emb(start/total_length, end/total_length)
            assert_shape(pos_emb, (N, self.n_time, self.out_width))
        else:
            pos_emb = None
        return start_emb, pos_emb

================================================
FILE: jukebox/prior/prior.py
================================================
import numpy as np
import torch as t
import torch.nn as nn
import jukebox.utils.dist_adapter as dist

from jukebox.transformer.ops import LayerNorm
from jukebox.prior.autoregressive import ConditionalAutoregressive2D
from jukebox.prior.conditioners import Conditioner, LabelConditioner
from jukebox.data.labels import EmptyLabeller, Labeller

from jukebox.utils.torch_utils import assert_shape
from jukebox.utils.dist_utils import print_once
from jukebox.vqvae.vqvae import calculate_strides


"""
Model the prior on vq codes conditioned on timing, artist, genre, lyrics and codes from levels above. 
To condition on the timing, genre and artist, we use the LabelConditioner class
To condition on the codes from the level above, we use the Conditioner class
To condition on lyrics, we allow two types of priors:
- Separate Encoder Decoder: This is the usual encoder-decoder style transformer. The encoder transformer autoregressively 
models the lyrics, and we use its last layer to produce keys/values that are attened to by the decoder transformer
- Single Encoder Decoder: This is a simplification where we combine them into a single model. We merge the text vocab 
and VQ vocab into a single large vocab, and the lyric tokens and VQ tokens into a single longer sequence of tokens which 
we autoregressively model together.
"""
class SimplePrior(nn.Module):
    def __init__(self, z_shapes, l_bins, encoder, decoder, level,
                 downs_t, strides_t, labels, prior_kwargs, x_cond_kwargs, y_cond_kwargs,
                 prime_kwargs, copy_input, labels_v3=False,
                 merged_decoder=False, single_enc_dec=False):
        super().__init__()

        self.use_tokens = prime_kwargs.pop('use_tokens')
        self.n_tokens = prime_kwargs.pop('n_tokens')
        self.prime_loss_fraction = prime_kwargs.pop('prime_loss_fraction')

        self.copy_input = copy_input
        if self.copy_input:
            prime_kwargs['bins'] = l_bins

        self.z_shapes = z_shapes
        self.levels = len(self.z_shapes)

        self.z_shape = self.z_shapes[level]

        self.level = level
        assert level < self.levels, f"Total levels {self.levels}, got level {level}"

        self.l_bins = l_bins

        # Passing functions instead of the vqvae module to avoid getting params
        self.encoder = encoder
        self.decoder = decoder

        # X conditioning
        self.x_cond = (level != (self.levels - 1))
        self.cond_level = level + 1

        # Y conditioning
        self.y_cond = labels

        self.single_enc_dec = single_enc_dec
        # X conditioning
        if self.x_cond:
            self.conditioner_blocks = nn.ModuleList()
            conditioner_block = lambda _level: Conditioner(input_shape=z_shapes[_level],
                                                          bins=l_bins,
                                                          down_t=downs_t[_level],
                                                          stride_t=strides_t[_level],
                                                          **x_cond_kwargs)
            if dist.get_rank() == 0: print(f"Conditioning on 1 above level(s)")
            self.conditioner_blocks.append(conditioner_block(self.cond_level))

        # Y conditioning
        if self.y_cond:
            self.n_time = self.z_shape[0] # Assuming STFT=TF order and raw=T1 order, so T is first dim
            self.y_emb = LabelConditioner(n_time=self.n_time,include_time_signal=not self.x_cond,**y_cond_kwargs)

        # Lyric conditioning
        if single_enc_dec:
            # Single encoder-decoder transformer
            self.prior_shapes = [(self.n_tokens,), prior_kwargs.pop('input_shape')]
            self.prior_bins = [prime_kwargs['bins'], prior_kwargs.pop('bins')]
            self.prior_dims = [np.prod(shape) for shape in self.prior_shapes]
            self.prior_bins_shift = np.cumsum([0, *self.prior_bins])[:-1]
            self.prior_width = prior_kwargs['width']
            print_once(f'Creating cond. autoregress with prior bins {self.prior_bins}, ')
            print_once(f'dims {self.prior_dims}, ')
            print_once(f'shift {self.prior_bins_shift}')
            print_once(f'input shape {sum(self.prior_dims)}')
            print_once(f'input bins {sum(self.prior_bins)}')
            print_once(f'Self copy is {self.copy_input}')

            self.prime_loss_dims, self.gen_loss_dims = self.prior_dims[0], self.prior_dims[1]
            self.total_loss_dims = self.prime_loss_dims + self.gen_loss_dims
            self.prior = ConditionalAutoregressive2D(input_shape=(sum(self.prior_dims),),
                                                     bins=sum(self.prior_bins),
                                                     x_cond=(self.x_cond or self.y_cond), y_cond=True,
                                                     prime_len=self.prime_loss_dims,
                                                     **prior_kwargs)

        else:
            # Separate encoder-decoder transformer
            if self.n_tokens != 0 and self.use_tokens:
                from jukebox.transformer.ops import Conv1D
                prime_input_shape = (self.n_tokens,)
                self.prime_loss_dims = np.prod(prime_input_shape)
                self.prime_acts_width, self.prime_state_width = prime_kwargs['width'], prior_kwargs['width']
                self.prime_prior = ConditionalAutoregressive2D(input_shape=prime_input_shape, x_cond=False, y_cond=False,
                                                               only_encode=True,
                                                               **prime_kwargs)
                self.prime_state_proj = Conv1D(self.prime_acts_width, self.prime_state_width, init_scale=prime_kwargs['init_scale'])
                self.prime_state_ln = LayerNorm(self.prime_state_width)
                self.prime_bins = prime_kwargs['bins']
                self.prime_x_out = nn.Linear(self.prime_state_width, self.prime_bins, bias=False)
                nn.init.normal_(self.prime_x_out.weight, std=0.02 * prior_kwargs['init_scale'])
            else:
                self.prime_loss_dims = 0
            self.gen_loss_dims = np.prod(self.z_shape)
            self.total_loss_dims = self.prime_loss_dims + self.gen_loss_dims
            self.prior = ConditionalAutoregressive2D(x_cond=(self.x_cond or self.y_cond), y_cond=self.y_cond,
                                                     encoder_dims = self.prime_loss_dims, merged_decoder=merged_decoder,
                                                     **prior_kwargs)

        self.n_ctx = self.gen_loss_dims
        self.downsamples = calculate_strides(strides_t, downs_t)
        self.cond_downsample = self.downsamples[level+1] if level != self.levels - 1 else None
        self.raw_to_tokens = np.prod(self.downsamples[:level+1])
        self.sample_length = self.n_ctx*self.raw_to_tokens
        if labels:
            self.labels_v3 = labels_v3
            self.labeller = Labeller(self.y_emb.max_bow_genre_size, self.n_tokens, self.sample_length, v3=self.labels_v3)
        else:
            self.labeller = EmptyLabeller()

        print(f"Level:{level}, Cond downsample:{self.cond_downsample}, Raw to tokens:{self.raw_to_tokens}, Sample length:{self.sample_length}")


    def get_y(self, labels, start, get_indices=False):
        if isinstance(self.labeller, EmptyLabeller):
            return None
        y = labels['y'].clone()

        # Set sample_length to match this level
        y[:, 2] = int(self.sample_length)

        # Set offset
        y[:, 1:2] = y[:, 1:2] + int(start * self.raw_to_tokens)

        # Set lyric tokens
        indices = self.labeller.set_y_lyric_tokens(y, labels)
        if get_indices:
            return y, indices
        else:
            return y

    def get_z_conds(self, zs, start, end):
        if self.level != self.levels - 1:
            assert start % self.cond_downsample == end % self.cond_downsample == 0
            z_cond = zs[self.level + 1][:,start//self.cond_downsample:end//self.cond_downsample]
            assert z_cond.shape[1] == self.n_ctx//self.cond_downsample
            z_conds = [z_cond]
        else:
            z_conds = None
        return z_conds

    def prior_preprocess(self, xs, conds):
        N = xs[0].shape[0]
        for i in range(len(xs)):
            x, shape, dims = xs[i], self.prior_shapes[i], self.prior_dims[i]
            bins, bins_shift = int(self.prior_bins[i]), int(self.prior_bins_shift[i])
            assert isinstance(x, t.cuda.LongTensor), x
            assert (0 <= x).all() and (x < bins).all()
            #assert_shape(x, (N, *shape))
            xs[i] = (xs[i] + bins_shift).view(N, -1)

        for i in range(len(conds)):
            cond, shape, dims = conds[i], self.prior_shapes[i], self.prior_dims[i]
            if cond is not None:
                assert_shape(cond, (N, dims, self.prior_width))
            else:
                conds[i] = t.zeros((N, dims, self.prior_width), dtype=t.float, device='cuda')

        return t.cat(xs, dim=1), t.cat(conds, dim=1)

    def prior_postprocess(self, z):
        N = z.shape[0]
        dims = (self.prior_dims[0], z.shape[1] - self.prior_dims[0])
        # xs = list(t.split(z, self.prior_dims, dim=1))
        xs = list(t.split(z, dims, dim=1))

        for i in range(len(xs)):
            # x, shape, dims, bins, bins_shift = xs[i], self.prior_shapes[i], self.prior_dims[i], self.prior_bins[i], self.prior_bins_shift[i]
            # assert_shape(x, (N, dims))
            shape = self.prior_shapes[i]
            bins, bins_shift = int(self.prior_bins[i]), int(self.prior_bins_shift[i])
            # xs[i] = (xs[i] - bins_shift).view(N, *shape) #view(N, -1, *shape[1:])
            xs[i] = (xs[i] - bins_shift).view(N, -1, *shape[1:])
            xs[i] = t.clamp(xs[i], min=0)  # If not masking loss, model may have generated lyric/midi tokens which are now shifted <0 by bin_shift
            assert (xs[i] < bins).all(), f'rank: {dist.get_rank()}, bins: {bins}, dims {dims}, shape {shape}, prior_shape {self.prior_shapes}, bins_shift {bins_shift}, xs[i]: {xs[i]}'

        return xs[-1]

    def x_emb(self, z_conds):
        z_conds = z_conds[:self.cond_level - self.level]
        assert len(z_conds) == len(self.conditioner_blocks) == self.cond_level - self.level, f"Expected {len(z_conds)} == {len(self.conditioner_blocks)} == {self.cond_level} - {self.level}"
        x_cond = None
        for z_cond, conditioner_block in reversed(list(zip(z_conds, self.conditioner_blocks))):
            x_cond = conditioner_block(z_cond, x_cond)
        return x_cond

    def encode(self, x, start_level=None, end_level=None, bs_chunks=1):
        if start_level == None:
            start_level = self.level
        if end_level == None:
            end_level = self.levels
        # Get latents
        with t.no_grad():
            zs = self.encoder(x, start_level=start_level, end_level=end_level, bs_chunks=bs_chunks)
        return zs

    def decode(self, zs, start_level=None, end_level=None, bs_chunks=1):
        if start_level == None:
            start_level = self.level
        if end_level == None:
            end_level = self.levels

        assert len(zs) == end_level - start_level
        with t.no_grad():
            x_out = self.decoder(zs, start_level=start_level, end_level=end_level, bs_chunks=bs_chunks)
        return x_out

    def get_cond(self, z_conds, y):
        if y is not None:
            assert y.shape[1] == 4 + self.y_emb.max_bow_genre_size + self.n_tokens, f"Expected {4} + {self.y_emb.max_bow_genre_size} + {self.n_tokens}, got {y.shape[1]}"
            n_labels = y.shape[1] - self.n_tokens
            y, prime = y[:,:n_labels], y[:,n_labels:]
        else:
            y, prime = None, None
        y_cond, y_pos = self.y_emb(y) if self.y_cond else (None, None)
        x_cond = self.x_emb(z_conds) if self.x_cond else y_pos
        return x_cond, y_cond, prime

    def sample(self, n_samples, z=None, z_conds=None, y=None, fp16=False, temp=1.0, top_k=0, top_p=0.0,
               chunk_size=None, sample_tokens=None):
        N = n_samples
        if z is not None: assert z.shape[0] == N, f"Expected shape ({N},**), got shape {z.shape}"
        if y is not None: assert y.shape[0] == N, f"Expected shape ({N},**), got shape {y.shape}"
        if z_conds is not None:
            for z_cond in z_conds:
                assert z_cond.shape[0] == N,  f"Expected shape ({N},**), got shape {z_cond.shape}"

        no_past_context = (z is None or z.shape[1] == 0)
        if dist.get_rank() == 0:
            name = {True: 'Ancestral', False: 'Primed'}[no_past_context]
            print(f"{name} sampling {n_samples} samples with temp={temp}, top_k={top_k}, top_p={top_p}")

        with t.no_grad():
            # Currently x_cond only uses immediately above layer
            x_cond, y_cond, prime = self.get_cond(z_conds, y)
            if self.single_enc_dec:
                # assert chunk_size % self.prime_loss_dims == 0. TODO: Check if needed
                if no_past_context:
                    z, x_cond = self.prior_preprocess([prime], [None, x_cond])
                else:
                    z, x_cond = self.prior_preprocess([prime, z], [None, x_cond])
                if sample_tokens is not None:
                    sample_tokens += self.n_tokens
                z = self.prior.primed_sample(n_samples, z, x_cond, y_cond, fp16=fp16, temp=temp,
                                             top_k=top_k, top_p=top_p, chunk_size=chunk_size, sample_tokens=sample_tokens)
                z = self.prior_postprocess(z)
            else:
                encoder_kv = self.get_encoder_kv(prime, fp16=fp16, sample=True)
                if no_past_context:
                    z = self.prior.sample(n_samples, x_cond, y_cond, encoder_kv, fp16=fp16, temp=temp, top_k=top_k,
                                          top_p=top_p, sample_tokens=sample_tokens)
                else:
                    z = self.prior.primed_sample(n_samples, z, x_cond, y_cond, encoder_kv, fp16=fp16, temp=temp,
                                             top_k=top_k, top_p=top_p, chunk_size=chunk_size, sample_tokens=sample_tokens)
            if sample_tokens is None:
                assert_shape(z, (N, *self.z_shape))
        return z

    def get_encoder_kv(self, prime, fp16=False, sample=False):
        if self.n_tokens != 0 and self.use_tokens:
            if sample:
                self.prime_prior.cuda()
            N = prime.shape[0]
            prime_acts = self.prime_prior(prime, None, None, None, fp16=fp16)
            assert_shape(prime_acts, (N, self.prime_loss_dims, self.prime_acts_width))
            assert prime_acts.dtype == t.float, f'Expected t.float, got {prime_acts.dtype}'
            encoder_kv = self.prime_state_ln(self.prime_state_proj(prime_acts))
            assert encoder_kv.dtype == t.float, f'Expected t.float, got {encoder_kv.dtype}'
            if sample:
                self.prime_prior.cpu()
                if fp16:
                    encoder_kv = encoder_kv.half()
        else:
            encoder_kv = None
        return encoder_kv

    def get_prime_loss(self, encoder_kv, prime_t):
        if self.use_tokens:
            encoder_kv = encoder_kv.float()
            encoder_kv = self.prime_x_out(encoder_kv)
            prime_loss = nn.functional.cross_entropy(encoder_kv.view(-1, self.prime_bins), prime_t.view(-1)) / np.log(2.)
        else:
            prime_loss = t.tensor(0.0, device='cuda')
        return prime_loss

    def z_forward(self, z, z_conds=[], y=None, fp16=False, get_preds=False, get_attn_weights=False):
        """
        Arguments:
            get_attn_weights (bool or set): Makes forward prop dump
                self-attention softmaxes to self.prior.transformer.ws. Either a
                set of layer indices indicating which layers to store, or a
                boolean value indicating whether to dump all.
        """
        assert isinstance(get_attn_weights, (bool, set))
        if get_attn_weights:
            self.prior.transformer.set_record_attn(get_attn_weights)
        x_cond, y_cond, prime = self.get_cond(z_conds, y)
        if self.copy_input:
            prime = z[:,:self.n_tokens]
        if self.single_enc_dec:
            z, x_cond = self.prior_preprocess([prime, z], [None, x_cond])
            (prime_loss, gen_loss), preds = self.prior(z, x_cond, y_cond, fp16=fp16, get_sep_loss=True, get_preds=get_preds)
        else:
            encoder_kv = self.get_encoder_kv(prime, fp16=fp16)
            prime_loss = self.get_prime_loss(encoder_kv, prime)
            gen_loss, preds = self.prior(z, x_cond, y_cond, encoder_kv, fp16=fp16, get_preds=get_preds)
        loss = (self.prime_loss_fraction*prime_loss*self.prime_loss_dims/self.total_loss_dims) + \
                   (gen_loss*self.gen_loss_dims/self.total_loss_dims)
        metrics=dict(bpd=gen_loss.clone().detach(), prime_loss=prime_loss.clone().detach(),
                     gen_loss=gen_loss.clone().detach())
        if get_preds:
            metrics["preds"] = preds.clone().detach()
        if get_attn_weights:
            ws = self.prior.transformer.ws
            self.prior.transformer.set_record_attn(False)
            return ws
        else:
            return loss, metrics

    def forward(self, x, y=None, fp16=False, decode=False, get_preds=False):
        bs = x.shape[0]
        z, *z_conds = self.encode(x, bs_chunks=bs)
        loss, metrics = self.z_forward(z=z, z_conds=z_conds, y=y, fp16=fp16, get_preds=get_preds)
        if decode:
            x_out = self.decode([z, *z_conds])
        else:
            x_out = None
        return x_out, loss, metrics


================================================
FILE: jukebox/sample.py
================================================
import os
import torch as t
import jukebox.utils.dist_adapter as dist

from jukebox.hparams import Hyperparams
from jukebox.data.labels import EmptyLabeller
from jukebox.utils.torch_utils import empty_cache
from jukebox.utils.audio_utils import save_wav, load_audio
from jukebox.make_models import make_model
from jukebox.align import get_alignment
from jukebox.save_html import save_html
from jukebox.utils.sample_utils import split_batch, get_starts
from jukebox.utils.dist_utils import print_once
import fire

# Sample a partial window of length<n_ctx with tokens_to_sample new tokens on level=level
def sample_partial_window(zs, labels, sampling_kwargs, level, prior, tokens_to_sample, hps):
    z = zs[level]
    n_ctx = prior.n_ctx
    current_tokens = z.shape[1]
    if current_tokens < n_ctx - tokens_to_sample:
        sampling_kwargs['sample_tokens'] = current_tokens + tokens_to_sample
        start = 0
    else:
        sampling_kwargs['sample_tokens'] = n_ctx
        start = current_tokens - n_ctx + tokens_to_sample

    return sample_single_window(zs, labels, sampling_kwargs, level, prior, start, hps)

# Sample a single window of length=n_ctx at position=start on level=level
def sample_single_window(zs, labels, sampling_kwargs, level, prior, start, hps):
    n_samples = hps.n_samples
    n_ctx = prior.n_ctx
    end = start + n_ctx

    # get z already sampled at current level
    z = zs[level][:,start:end]

    if 'sample_tokens' in sampling_kwargs:
        # Support sampling a window shorter than n_ctx
        sample_tokens = sampling_kwargs['sample_tokens']
    else:
        sample_tokens = (end - start)
    conditioning_tokens, new_tokens = z.shape[1], sample_tokens - z.shape[1]

    print_once(f"Sampling {sample_tokens} tokens for [{start},{start+sample_tokens}]. Conditioning on {conditioning_tokens} tokens")

    if new_tokens <= 0:
        # Nothing new to sample
        return zs
    
    # get z_conds from level above
    z_conds = prior.get_z_conds(zs, start, end)

    # set y offset, sample_length and lyrics tokens
    y = prior.get_y(labels, start)

    empty_cache()

    max_batch_size = sampling_kwargs['max_batch_size']
    del sampling_kwargs['max_batch_size']


    z_list = split_batch(z, n_samples, max_batch_size)
    z_conds_list = split_batch(z_conds, n_samples, max_batch_size)
    y_list = split_batch(y, n_samples, max_batch_size)
    z_samples = []
    for z_i, z_conds_i, y_i in zip(z_list, z_conds_list, y_list):
        z_samples_i = prior.sample(n_samples=z_i.shape[0], z=z_i, z_conds=z_conds_i, y=y_i, **sampling_kwargs)
        z_samples.append(z_samples_i)
    z = t.cat(z_samples, dim=0)

    sampling_kwargs['max_batch_size'] = max_batch_size

    # Update z with new sample
    z_new = z[:,-new_tokens:]
    zs[level] = t.cat([zs[level], z_new], dim=1)
    return zs

# Sample total_length tokens at level=level with hop_length=hop_length
def sample_level(zs, labels, sampling_kwargs, level, prior, total_length, hop_length, hps):
    print_once(f"Sampling level {level}")
    if total_length >= prior.n_ctx:
        for start in get_starts(total_length, prior.n_ctx, hop_length):
            zs = sample_single_window(zs, labels, sampling_kwargs, level, prior, start, hps)
    else:
        zs = sample_partial_window(zs, labels, sampling_kwargs, level, prior, total_length, hps)
    return zs

# Sample multiple levels
def _sample(zs, labels, sampling_kwargs, priors, sample_levels, hps):
    alignments = None
    for level in reversed(sample_levels):
        prior = priors[level]
        prior.cuda()
        empty_cache()

        # Set correct total_length, hop_length, labels and sampling_kwargs for level
        assert hps.sample_length % prior.raw_to_tokens == 0, f"Expected sample_length {hps.sample_length} to be multiple of {prior.raw_to_tokens}"
        total_length = hps.sample_length//prior.raw_to_tokens
        hop_length = int(hps.hop_fraction[level]*prior.n_ctx)
        zs = sample_level(zs, labels[level], sampling_kwargs[level], level, prior, total_length, hop_length, hps)

        prior.cpu()
        empty_cache()

        # Decode sample
        x = prior.decode(zs[level:], start_level=level, bs_chunks=zs[level].shape[0])

        if dist.get_world_size() > 1:
            logdir = f"{hps.name}_rank_{dist.get_rank()}/level_{level}"
        else:
            logdir = f"{hps.name}/level_{level}"
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        t.save(dict(zs=zs, labels=labels, sampling_kwargs=sampling_kwargs, x=x), f"{logdir}/data.pth.tar")
        save_wav(logdir, x, hps.sr)
        if alignments is None and priors[-1] is not None and priors[-1].n_tokens > 0 and not isinstance(priors[-1].labeller, EmptyLabeller):
            alignments = get_alignment(x, zs, labels[-1], priors[-1], sampling_kwargs[-1]['fp16'], hps)
        save_html(logdir, x, zs, labels[-1], alignments, hps)
    return zs

# Generate ancestral samples given a list of artists and genres
def ancestral_sample(labels, sampling_kwargs, priors, hps):
    sample_levels = list(range(len(priors)))
    zs = [t.zeros(hps.n_samples,0,dtype=t.long, device='cuda') for _ in range(len(priors))]
    zs = _sample(zs, labels, sampling_kwargs, priors, sample_levels, hps)
    return zs

# Continue ancestral sampling from previously saved codes
def continue_sample(zs, labels, sampling_kwargs, priors, hps):
    sample_levels = list(range(len(priors)))
    zs = _sample(zs, labels, sampling_kwargs, priors, sample_levels, hps)
    return zs

# Upsample given already generated upper-level codes
def upsample(zs, labels, sampling_kwargs, priors, hps):
    sample_levels = list(range(len(priors) - 1))
    zs = _sample(zs, labels, sampling_kwargs, priors, sample_levels, hps)
    return zs

# Prompt the model with raw audio input (dimension: NTC) and generate continuations
def primed_sample(x, labels, sampling_kwargs, priors, hps):
    sample_levels = list(range(len(priors)))
    zs = priors[-1].encode(x, start_level=0, end_level=len(priors), bs_chunks=x.shape[0])
    zs = _sample(zs, labels, sampling_kwargs, priors, sample_levels, hps)
    return zs

# Load `duration` seconds of the given audio files to use as prompts
def load_prompts(audio_files, duration, hps):
    xs = []
    for audio_file in audio_files:
        x = load_audio(audio_file, sr=hps.sr, duration=duration, offset=0.0, mono=True)
        x = x.T # CT -> TC
        xs.append(x)
    while len(xs) < hps.n_samples:
        xs.extend(xs)
    xs = xs[:hps.n_samples]
    x = t.stack([t.from_numpy(x) for x in xs])
    x = x.to('cuda', non_blocking=True)
    return x

# Load codes from previous sampling run
def load_codes(codes_file, duration, priors, hps):
    data = t.load(codes_file, map_location='cpu')
    zs = [z.cuda() for z in data['zs']]
    assert zs[-1].shape[0] == hps.n_samples, f"Expected bs = {hps.n_samples}, got {zs[-1].shape[0]}"
    del data
    if duration is not None:
        # Cut off codes to match duration
        top_raw_to_tokens = priors[-1].raw_to_tokens
        assert duration % top_raw_to_tokens == 0, f"Cut-off duration {duration} not an exact multiple of top_raw_to_tokens"
        assert duration//top_raw_to_tokens <= zs[-1].shape[1], f"Cut-off tokens {duration//priors[-1].raw_to_tokens} longer than tokens {zs[-1].shape[1]} in saved codes"
        zs = [z[:,:duration//prior.raw_to_tokens] for z, prior in zip(zs, priors)]
    return zs

# Generate and save samples, alignment, and webpage for visualization.
def save_samples(model, device, hps, sample_hps):
    print(hps)
    from jukebox.lyricdict import poems, gpt_2_lyrics
    vqvae, priors = make_model(model, device, hps)

    assert hps.sample_length//priors[-2].raw_to_tokens >= priors[-2].n_ctx, f"Upsampling needs atleast one ctx in get_z_conds. Please choose a longer sample length"

    total_length = hps.total_sample_length_in_seconds * hps.sr
    offset = 0

    # Set artist/genre/lyrics for your samples here!
    # We used different label sets in our models, but you can write the human friendly names here and we'll map them under the hood for each model.
    # For the 5b/5b_lyrics model and the upsamplers, labeller will look up artist and genres in v2 set. (after lowercasing, removing non-alphanumerics and collapsing whitespaces to _).
    # For the 1b_lyrics top level, labeller will look up artist and genres in v3 set (after lowercasing).
    metas = [dict(artist = "Alan Jackson",
                  genre = "Country",
                  lyrics = poems['ozymandias'],
                  total_length=total_length,
                  offset=offset,
                  ),
             dict(artist="Joe Bonamassa",
                  genre="Blues Rock",
                  lyrics=gpt_2_lyrics['hottub'],
                  total_length=total_length,
                  offset=offset,
                  ),
             dict(artist="Frank Sinatra",
                  genre="Classic Pop",
                  lyrics=gpt_2_lyrics['alone'],
                  total_length=total_length,
                  offset=offset,
                  ),
             dict(artist="Ella Fitzgerald",
                  genre="Jazz",
                  lyrics=gpt_2_lyrics['count'],
                  total_length=total_length,
                  offset=offset,
                  ),
             dict(artist="Céline Dion",
                  genre="Pop",
                  lyrics=gpt_2_lyrics['darkness'],
                  total_length=total_length,
                  offset=offset,
                  ),
             ]
    while len(metas) < hps.n_samples:
        metas.extend(metas)
    metas = metas[:hps.n_samples]

    labels = [prior.labeller.get_batch_labels(metas, 'cuda') for prior in priors]
    for label in labels:
        assert label['y'].shape[0] == hps.n_samples

    lower_level_chunk_size = 32
    lower_level_max_batch_size = 16
    if model == '1b_lyrics':
        chunk_size = 32
        max_batch_size = 16
    else:
        chunk_size = 16
        max_batch_size = 3
    sampling_kwargs = [dict(temp=0.99, fp16=True, chunk_size=lower_level_chunk_size, max_batch_size=lower_level_max_batch_size),
                       dict(temp=0.99, fp16=True, chunk_size=lower_level_chunk_size, max_batch_size=lower_level_max_batch_size),
                       dict(temp=0.99, fp16=True, chunk_size=chunk_size, max_batch_size=max_batch_size)]

    if sample_hps.mode == 'ancestral':
        ancestral_sample(labels, sampling_kwargs, priors, hps)
    elif sample_hps.mode in ['continue', 'upsample']:
        assert sample_hps.codes_file is not None
        top_raw_to_tokens = priors[-1].raw_to_tokens
        if sample_hps.prompt_length_in_seconds is not None:
            duration = (int(sample_hps.prompt_length_in_seconds * hps.sr) // top_raw_to_tokens) * top_raw_to_tokens
        else:
            duration = None
        zs = load_codes(sample_hps.codes_file, duration, priors, hps)
        if sample_hps.mode == 'continue':
            continue_sample(zs, labels, sampling_kwargs, priors, hps)
        elif sample_hps.mode == 'upsample':
            upsample(zs, labels, sampling_kwargs, priors, hps)
    elif sample_hps.mode == 'primed':
        assert sample_hps.audio_file is not None
        assert sample_hps.prompt_length_in_seconds is not None
        audio_files = sample_hps.audio_file.split(',')
        top_raw_to_tokens = priors[-1].raw_to_tokens
        duration = (int(sample_hps.prompt_length_in_seconds * hps.sr) // top_raw_to_tokens) * top_raw_to_tokens
        x = load_prompts(audio_files, duration, hps)
        primed_sample(x, labels, sampling_kwargs, priors, hps)
    else:
        raise ValueError(f'Unknown sample mode {sample_hps.mode}.')


def run(model, mode='ancestral', codes_file=None, audio_file=None, prompt_length_in_seconds=None, port=29500, **kwargs):
    from jukebox.utils.dist_utils import setup_dist_from_mpi
    rank, local_rank, device = setup_dist_from_mpi(port=port)
    hps = Hyperparams(**kwargs)
    sample_hps = Hyperparams(dict(mode=mode, codes_file=codes_file, audio_file=audio_file, prompt_length_in_seconds=prompt_length_in_seconds))

    with t.no_grad():
        save_samples(model, device, hps, sample_hps)

if __name__ == '__main__':
    fire.Fire(run)


================================================
FILE: jukebox/save_html.py
================================================
import os
import json
import numpy as np
from PIL import Image, ImageFilter
import soundfile

def save_html(logdir, x, zs, labels, alignments, hps):
    level = hps.levels - 1 # Top level used
    z = zs[level]
    bs, total_length = z.shape[0], z.shape[1]

    with open(f'{logdir}/index.html', 'w') as html:
        print(f"<html><head><title>{logdir}</title></head><body style='font-family: sans-serif; font-size: 1.4em; font-weight: bold; text-align: center; max-width:1024px; width: 100%; margin: auto;'>",
            file=html)
        print("<link rel='icon' href='data:;base64,iVBORw0KGgo='>", file=html)

        for item in range(bs):
            data = dict(wav=x[item].cpu().numpy(), sr=hps.sr,
                        info=labels['info'][item],
                        total_length=total_length,
                        total_tokens=len(labels['info'][item]['full_tokens']),
                        alignment=alignments[item] if alignments is not None else None)
            item_dir = f'{logdir}/item_{item}'
            _save_item_html(item_dir, item, item, data)
            print(f"<iframe style='height: 100%; width: 100%;' frameborder='0' scrolling='no' src='item_{item}/index.html'></iframe>", file=html)
        print("</body></html>", file=html)  

def _save_item_html(item_dir, item_id, item_name, data):
    # replace gs:// with /root/samples/

    # an html for each sample. Main html has a selector to get us id of this?
    if not os.path.exists(item_dir):
        os.makedirs(item_dir)

    with open(f'{item_dir}/index.html', 'w') as html:
        print(f"<html><head><title>{item_name}</title></head><body style='font-family: sans-serif; font-size: 1.4em; font-weight: bold; text-align: center; max-width:1024px; width: 100%; margin: auto;'>",
            file=html)
        print("<link rel='icon' href='data:;base64,iVBORw0KGgo='>", file=html)
        total_length = data['total_length']
        total_tokens = data['total_tokens']
        alignment = data['alignment']
        lyrics = data["info"]["lyrics"]
        wav, sr = data['wav'], data['sr']
        genre, artist = data["info"]["genre"], data["info"]["artist"]

        # Strip unused columns
        if alignment is not None:
            assert alignment.shape == (total_length, total_tokens)
            assert len(lyrics) == total_tokens, f'Total_tokens: {total_tokens}, Lyrics Len: {len(lyrics)}. Lyrics: {lyrics}'
            max_attn_at_token = np.max(alignment, axis=0)
            assert len(max_attn_at_token) == total_tokens
            for token in reversed(range(total_tokens)):
                if max_attn_at_token[token] > 0:
                    break
            alignment = alignment[:,:token+1]
            lyrics = lyrics[:token+1]
            total_tokens = token+1

            # Small alignment image
            im = Image.fromarray(np.uint8(alignment * 255)).resize((512, 1024)).transpose(Image.ROTATE_90)
            img_src = f'align.png'
            im.save(f'{item_dir}/{img_src}')
            print(f"<img id='{img_src}' src='{img_src}' \>", file=html)

            # Smaller alignment json for animation
            total_alignment_length = total_length // 16
            alignment = Image.fromarray(np.uint8(alignment * 255)).resize((total_tokens, total_alignment_length))
            alignment = alignment.filter(ImageFilter.GaussianBlur(radius=1.5))
            alignment = np.asarray(alignment).tolist()
            align_src = f'align.json'
            with open(f'{item_dir}/{align_src}', 'w') as f:
                json.dump(alignment, f)

        # Audio
        wav_src = f'audio.wav'
        soundfile.write(f'{item_dir}/{wav_src}', wav, samplerate=sr, format='wav')
        print(f"<audio id='{wav_src}' src='{wav_src}' style='width: 100%;' controls></audio>", file=html)


        # Labels and Lyrics
        print(f"<pre style='white-space: pre-wrap;'>", end="", file=html)
        print(f"<div>Artist {artist}, Genre {genre}</div>", file=html)
        lyrics = [c for c in lyrics]  # already characters actually
        lyrics = [''] + lyrics[:-1]  # input lyrics are shifted by 1
        for i, c in enumerate(lyrics):
            print(f"<span id='{item_id}/{i}'>{c}</span>", end="", file=html)
        print(f"</pre>", file=html)
        with open(f'{item_dir}/lyrics.json', 'w') as f:
            json.dump(lyrics, f)

        if alignment is not None:
            # JS for alignment animation
            print("""<script>
            async function fetchAsync (url) {
                let response = await fetch(url);
                let data = await response.json();
                return data;
            }
    
            var audio = document.getElementById('""" + f'{wav_src}' + """');
            audio.onplay = function () {
                track = '""" + f'{item_id}' + """'
                fetchAsync('""" + f'{align_src}' + """')
                .then(data => animateLyrics(data, track, this))
                .catch(reason => console.log(reason.message))
            }; 
    
            function animateLyrics(data, track, audio) {
                var animate = setInterval(function () {
                    var time = Math.floor(audio.currentTime*""" + f'{total_alignment_length}' + """/audio.duration);
                    if (!(time == 0 || time == """ + f'{total_alignment_length}' + """)) {
                        console.log(time);
                        changeColor(data, track, audio, time);
                    }
                    if (audio.paused) {
                        clearInterval(animate);
                    }
                }, 50);
            }
    
            function changeColor(data, track, audio, time) {
                colors = data[time]
                for (i = 0; i < colors.length; i++){
                    character = document.getElementById(track + '/' + i.toString());
                    color = Math.max(230 - 10*colors[i], 0).toString();
                    character.style.color = 'rgb(255,' + color + ',' + color + ')';
                }
            }
            </script>""", file=html)
        print("</body></html>", file=html)


================================================
FILE: jukebox/tests/test_sample.py
================================================
import torch as t
import numpy as np
from jukebox.sample import sample_level
from jukebox.utils.torch_utils import assert_shape
from jukebox.hparams import Hyperparams

def repeat(x, n, dim):
    if dim == -1:
        dim = len(x.shape) - 1
    return x.reshape(int(np.prod(x.shape[:dim+1])), 1, int(np.prod(x.shape[dim+1:]))).repeat(1,n,1).reshape(*x.shape[:dim], n * x.shape[dim], *x.shape[dim+1:])

# Tests
class DummyPrior:
    def __init__(self, n_ctx, level, levels):
        self.n_ctx = n_ctx
        self.level = level
        self.levels = levels
        self.downsamples = (8,4,4)
        self.cond_downsample = self.downsamples[level+1] if level != self.levels - 1 else None
        self.raw_to_tokens = int(np.prod(self.downsamples[:level+1]))
        self.sample_length = self.n_ctx*self.raw_to_tokens

        print(f"Level:{level}, Cond downsample:{self.cond_downsample}, Raw to tokens:{self.raw_to_tokens}, Sample length:{self.sample_length}")

    def get_y(self, labels, start):
        y = labels['y'].clone()
        # Set sample_length to match this level
        y[:, 2] = self.sample_length
        # Set offset
        y[:, 1:2] = y[:, 1:2] + start * self.raw_to_tokens
        return y

    def get_z_conds(self, zs, start, end):
        if self.level != self.levels - 1:
            assert start % self.cond_downsample == end % self.cond_downsample == 0
            z_cond = zs[self.level + 1][:,start//self.cond_downsample:end//self.cond_downsample]
            assert z_cond.shape[1] == self.n_ctx//self.cond_downsample
            z_conds = [z_cond]
        else:
            z_conds = None
        return z_conds

    def ancestral_sample(self, n_samples, z_conds=None, y=None):
        z = t.zeros((n_samples, self.n_ctx), dtype=t.long, device='cuda') + \
            t.arange(0, self.n_ctx, dtype=t.long, device='cuda').view(1, self.n_ctx)

        if z_conds is not None:
            z_cond = z_conds[0]
            assert_shape(z_cond, (n_samples, self.n_ctx // 4))
            assert (z // 4 == repeat(z_cond, 4, 1)).all(), f'z: {z}, z_cond: {z_cond}, diff: {(z // 4) - repeat(z_cond, 4, 1)}'
        return z

    def primed_sample(self, n_samples, z, z_conds=None, y=None):
        prime = z.shape[1]
        assert_shape(z, (n_samples, prime))
        start = z[:,-1:] + 1
        z_rest = (t.arange(0, self.n_ctx - prime, dtype=t.long, device='cuda').view(1, self.n_ctx - prime) + start).view(n_samples, self.n_ctx - prime)
        z = t.cat([z, z_rest], dim=1)

        if z_conds is not None:
            z_cond = z_conds[0]
            assert_shape(z_cond, (n_samples, self.n_ctx // 4))
            assert (z // 4 == repeat(z_cond, 4, 1)).all(), f'z: {z}, z_cond: {z_cond}, diff: {(z // 4) - repeat(z_cond, 4, 1)}'
        return z

# Sample multiple levels
def _sample(zs, labels,  priors, sample_levels, hps):
    for level in reversed(sample_levels):
        prior = priors[level]
        # set correct total_length, hop_length and sampling_kwargs for level
        total_length = (hps.sample_length * hps.n_segment)//prior.raw_to_tokens
        hop_length = hps.hop_lengths[level]
        zs = sample_level(zs, labels[level], dict(), level, prior, total_length, hop_length, hps)
    return zs

# Ancestral sample
def test_ancestral_sample(labels, priors, hps):
    sample_levels = list(range(hps.levels))
    zs = [t.zeros(hps.n_samples,0,dtype=t.long, device='cuda') for _ in range(hps.levels)]
    zs = _sample(zs, labels, priors, sample_levels, hps)

    # Test
    for z in zs:
        total_length = z.shape[1]
        # Check sample
        assert ((z - t.arange(0, total_length, dtype=t.long, device='cuda').view(1, total_length)) == 0).all()

    print("dummy ancestral sample passed")

def test_primed_sample(labels, priors, hps):
    sample_levels = list(range(hps.levels))

    start = t.tensor([15, 23, 11, 9], dtype=t.long, device='cuda').view(4, 1)

    zs_in = []
    zs = []
    for i in reversed(range(3)):
        n_ctx = 8192*(4**i)
        n_prime = n_ctx // 4
        z_prime = t.arange(0, n_prime, dtype=t.long, device='cuda').view(1, n_prime) % (2*(4**i))
        z_rest = t.randint(-10, -1, size=(1, n_ctx - n_prime), dtype=t.long, device='cuda')
        z_in = t.cat([z_prime, z_rest], dim=1) + (4**i)*start
        zs_in.append(z_in)
        zs.append(z_prime + (4**i)*start)

    zs = _sample(zs, labels, priors, sample_levels, hps)

    # Test
    for z, z_in in zip(zs, zs_in):
        total_length = z.shape[1]
        prime_length = z.shape[1] // (4 * hps.n_segment)
        # Match prime tokens
        assert (z[:,:prime_length] == z_in[:,:prime_length]).all()
        # Check sample
        z_rest = z[:,prime_length-1:] - z[:,prime_length-1:prime_length]
        assert ((z_rest - t.arange(0, total_length - prime_length + 1, dtype=t.long, device='cuda').view(1, total_length - prime_length + 1)) == 0).all()

    print("dummy primed sample passed")

def check_sample():
    n_ctx = 8192
    n_samples = 4
    levels = 3
    priors = [DummyPrior(n_ctx, level, levels) for level in range(levels)]
    max_total_length, offset, sample_length = 4134368, 0, n_ctx*8*4*4
    y = t.tensor([max_total_length, offset, sample_length, 10, 1, -1, -1, -1, -1], dtype=t.long, device='cuda').view(1, 9).repeat(n_samples, 1)
    labels = [dict(y=y, info=[[]*n_samples]) for level in range(levels)]
    hps = Hyperparams({
        'levels': 3,
        'sample_length': sample_length,
        'n_segment': 2,
        'n_ctx': n_ctx,
        'n_tokens': 0,
        'hop_lengths': [n_ctx//2, n_ctx//2, n_ctx//8],
        'n_samples': n_samples,
        'use_tokens': False
    })
    test_ancestral_sample(labels, priors, hps)
    test_primed_sample(labels, priors, hps)

check_sample()


================================================
FILE: jukebox/train.py
================================================
"""
Ability to train vq-vae and prior
First try for random inputs
Then from maestros
"""
import sys
import fire
import warnings
import numpy as np
import torch as t
import jukebox.utils.dist_adapter as dist
from torch.nn.parallel import DistributedDataParallel

from jukebox.hparams import setup_hparams
from jukebox.make_models import make_vqvae, make_prior, restore_opt, save_checkpoint
from jukebox.utils.logger import init_logging
from jukebox.utils.audio_utils import audio_preprocess, audio_postprocess
from jukebox.utils.torch_utils import zero_grad, count_parameters
from jukebox.utils.dist_utils import print_once, allreduce, allgather
from jukebox.utils.ema import CPUEMA, FusedEMA, EMA
from jukebox.utils.fp16 import FP16FusedAdam, FusedAdam, LossScalar, clipped_grad_scale, backward
from jukebox.data.data_processor import DataProcessor

def prepare_aud(x, hps):
    x = audio_postprocess(x.detach().contiguous(), hps)
    return allgather(x)

def log_aud(logger, tag, x, hps):
    logger.add_audios(tag, prepare_aud(x, hps), hps.sr, max_len=hps.max_len, max_log=hps.max_log)
    logger.flush()

def log_labels(logger, labeller, tag, y, hps):
    y = y.cpu().numpy()
    txt = ''
    for item in range(y.shape[0]):
        description = labeller.describe_label(y[item])
        artist, genre, lyrics = description['artist'], description['genre'], description['lyrics']
        txt += f'{item} artist:{artist}, genre:{genre}, lyrics:{lyrics}\n'
    logger.add_text(tag, txt)
    logger.flush()

def get_ddp(model, hps):
    rank = dist.get_rank()
    local_rank = rank % 8
    ddp = DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank, broadcast_buffers=False, bucket_cap_mb=hps.bucket)
    return ddp

def get_ema(model, hps):
    mu = hps.mu or (1. - (hps.bs * hps.ngpus/8.)/1000)
    ema = None
    if hps.ema and hps.train:
        if hps.cpu_ema:
            if dist.get_rank() == 0:
                print("Using CPU EMA")
            ema = CPUEMA(model.parameters(), mu=mu, freq=hps.cpu_ema_freq)
        elif hps.ema_fused:
            ema = FusedEMA(model.parameters(), mu=mu)
        else:
            ema = EMA(model.parameters(), mu=mu)
    return ema

def get_lr_scheduler(opt, hps):
    def lr_lambda(step):
        if hps.lr_use_linear_decay:
            lr_scale = hps.lr_scale * min(1.0, step / hps.lr_warmup)
            decay = max(0.0, 1.0 - max(0.0, step - hps.lr_start_linear_decay) / hps.lr_decay)
            if decay == 0.0:
                if dist.get_rank() == 0:
                    print("Reached end of training")
            return lr_scale * decay
        else:
            return hps.lr_scale * (hps.lr_gamma ** (step // hps.lr_decay)) * min(1.0, step / hps.lr_warmup)

    shd = t.optim.lr_scheduler.LambdaLR(opt, lr_lambda)

    return shd

def get_optimizer(model, hps):
    # Optimizer
    betas = (hps.beta1, hps.beta2)
    if hps.fp16_opt:
        opt = FP16FusedAdam(model.parameters(), lr=hps.lr, weight_decay=hps.weight_decay, betas=betas, eps=hps.eps)
    else:
        opt = FusedAdam(model.parameters(), lr=hps.lr, weight_decay=hps.weight_decay, betas=betas, eps=hps.eps)

    # lr scheduler
    shd = get_lr_scheduler(opt, hps)

    restore_path = hps.restore_prior if hps.prior else hps.restore_vqvae
    restore_opt(opt, shd, restore_path)

    # fp16 dynamic loss scaler
    scalar = None
    if hps.fp16:
        rank = dist.get_rank()
        local_rank = rank % 8
        scalar = LossScalar(hps.fp16_loss_scale, scale_factor=2 ** (1./hps.fp16_scale_window))
        if local_rank == 0: print(scalar.__dict__)

    zero_grad(model)
    return opt, shd, scalar

def log_inputs(orig_model, logger, x_in, y, x_out, hps, tag="train"):
    print(f"Logging {tag} inputs/ouputs")
    log_aud(logger, f'{tag}_x_in', x_in, hps)
    log_aud(logger, f'{tag}_x_out', x_out, hps)
    bs = x_in.shape[0]
    if hps.prior:
        if hps.labels:
            log_labels(logger, orig_model.labeller, f'{tag}_y_in', allgather(y.cuda()), hps)
    else:
        zs_in = orig_model.encode(x_in, start_level=0, bs_chunks=bs)
        x_ds = [orig_model.decode(zs_in[level:], start_level=level, bs_chunks=bs) for level in range(0, hps.levels)]
        for i in range(len(x_ds)):
            log_aud(logger, f'{tag}_x_ds_start_{i}', x_ds[i], hps)
    logger.flush()

def sample_prior(orig_model, ema, logger, x_in, y, hps):
    if ema is not None: ema.swap()
    orig_model.eval()

    x_in = x_in[:hps.bs_sample]
    bs = x_in.shape[0]
    zs_in = orig_model.encode(x_in, start_level=0, bs_chunks=bs)
    assert len(zs_in) == hps.levels
    x_ds = [orig_model.decode(zs_in[level:], start_level=level, bs_chunks=bs) for level in range(0, hps.levels)]

    if not hps.labels:
        y = None
    elif hps.level == (hps.levels - 1):
        # Topmost level labels in order
        y = y[:hps.bs_sample]  # t.ones((hps.bs_sample, 1), device=y.device, dtype=t.long) * dist.get_rank()
    else:
        # Other levels keep labels to match x_cond
        y = y[:hps.bs_sample]

    # Temp 1.0
    _, *z_conds = orig_model.encode(x_in, bs_chunks=bs)
    z = orig_model.sample(hps.bs_sample, z_conds=z_conds, y=y, fp16=False, temp=1.0)
    x_sample = orig_model.decode([z, *z_conds], bs_chunks=bs)

    log_aud(logger, 'sample_x_T1', x_sample, hps)
    if hps.prior and hps.labels:
        log_labels(logger, orig_model.labeller, f'sample_x_T1', allgather(y.cuda()), hps)

    # Recons
    for i in range(len(x_ds)):
        log_aud(logger, f'x_ds_start_{i}', x_ds[i], hps)
    orig_model.train()
    if ema is not None: ema.swap()
    logger.flush()

def evaluate(model, orig_model, logger, metrics, data_processor, hps):
    model.eval()
    orig_model.eval()
    if hps.prior:
        _print_keys = dict(l="loss", bpd="bpd")
    else:
        _print_keys = dict(l="loss", rl="recons_loss", sl="spectral_loss")

    with t.no_grad():
        for i, x in logger.get_range(data_processor.test_loader):
            if isinstance(x, (tuple, list)):
                x, y = x
            else:
                y = None

            x = x.to('cuda', non_blocking=True)
            if y is not None:
                y = y.to('cuda', non_blocking=True)

            x_in = x = audio_preprocess(x, hps)
            log_input_output = (i==0)

            if hps.prior:
                forw_kwargs = dict(y=y, fp16=hps.fp16, decode=log_input_output)
            else:
                forw_kwargs = dict(loss_fn=hps.loss_fn, hps=hps)

            x_out, loss, _metrics = model(x, **forw_kwargs)

            # Logging
            for key, val in _metrics.items():
                _metrics[key] = val.item()
            _metrics["loss"] = loss = loss.item() # Make sure to call to free graph

            # Average and log
            for key, val in _metrics.items():
                _metrics[key] = metrics.update(f"test_{key}", val, x.shape[0])

            with t.no_grad():
                if log_input_output:
                    log_inputs(orig_model, logger, x_in, y, x_out, hps)

            logger.set_postfix(**{print_key:_metrics[key] for print_key, key in _print_keys.items()})

    for key, val in _metrics.items():
        logger.add_scalar(f"test_{key}", metrics.avg(f"test_{key}"))

    logger.close_range()
    return {key: metrics.avg(f"test_{key}") for key in _metrics.keys()}

def train(model, orig_model, opt, shd, scalar, ema, logger, metrics, data_processor, hps):
    model.train()
    orig_model.train()
    if hps.prior:
        _print_keys = dict(l="loss", bpd="bpd", gn="gn", g_l="gen_loss", p_l="prime_loss")
    else:
        _print_keys = dict(l="loss", sl="spectral_loss", rl="recons_loss", e="entropy", u="usage", uc="used_curr", gn="gn", pn="pn", dk="dk")

    for i, x in logger.get_range(data_processor.train_loader):
        if isinstance(x, (tuple, list)):
            x, y = x
        else:
            y = None

        x = x.to('cuda', non_blocking=True)
        if y is not None:
            y = y.to('cuda', non_blocking=True)

        x_in = x = audio_preprocess(x, hps)
        log_input_output = (logger.iters % hps.save_iters == 0)

        if hps.prior:
            forw_kwargs = dict(y=y, fp16=hps.fp16, decode=log_input_output)
        else:
            forw_kwargs = dict(loss_fn=hps.loss_fn, hps=hps)

        # Forward
        x_out, loss, _metrics = model(x, **forw_kwargs)

        # Backward
        loss, scale, grad_norm, overflow_loss, overflow_grad = backward(loss=loss, params=list(model.parameters()),
                                                                         scalar=scalar, fp16=hps.fp16, logger=logger)
        # Skip step if overflow
        grad_norm = allreduce(grad_norm, op=dist.ReduceOp.MAX)
        if overflow_loss or overflow_grad or grad_norm > hps.ignore_grad_norm > 0:
            zero_grad(orig_model)
            continue

        # Step opt. Divide by scale to include clipping and fp16 scaling
        logger.step()
        opt.step(scale=clipped_grad_scale(grad_norm, hps.clip, scale))
        zero_grad(orig_model)
        lr = hps.lr if shd is None else shd.get_lr()[0]
        if shd is not None: shd.step()
        if ema is not None: ema.step()
        next_lr = hps.lr if shd is None else shd.get_lr()[0]
        finished_training = (next_lr == 0.0)

        # Logging
        for key, val in _metrics.items():
            _metrics[key] = val.item()
        _metrics["loss"] = loss = loss.item() * hps.iters_before_update # Make sure to call to free graph
        _metrics["gn"] = grad_norm
        _metrics["lr"] = lr
        _metrics["lg_loss_scale"] = np.log2(scale)

        # Average and log
        for key, val in _metrics.items():
            _metrics[key] = metrics.update(key, val, x.shape[0])
            if logger.iters % hps.log_steps == 0:
                logger.add_scalar(key, _metrics[key])

        # Save checkpoint
        with t.no_grad():
            if hps.save and (logger.iters % hps.save_iters == 1 or finished_training):
                if ema is not None: ema.swap()
                orig_model.eval()
                name = 'latest' if hps.prior else f'step_{logger.iters}'
                if dist.get_rank() % 8 == 0:
                    save_checkpoint(logger, name, orig_model, opt, dict(step=logger.iters), hps)
                orig_model.train()
                if ema is not None: ema.swap()

        # Sample
        with t.no_grad():
            if (logger.iters % 12000) in list(range(1, 1 + hps.iters_before_update)) or finished_training:
                if hps.prior:
                    sample_prior(orig_model, ema, logger, x_in, y, hps)

        # Input/Output
        with t.no_grad():
            if log_input_output:
                log_inputs(orig_model, logger, x_in, y, x_out, hps)

        logger.set_postfix(**{print_key:_metrics[key] for print_key, key in _print_keys.items()})
        if finished_training:
            dist.barrier()
            exit()
    logger.close_range()
    return {key: metrics.avg(key) for key in _metrics.keys()}

def run(hps="teeny", port=29500, **kwargs):
    from jukebox.utils.dist_utils import setup_dist_from_mpi
    rank, local_rank, device = setup_dist_from_mpi(port=port)
    hps = setup_hparams(hps, kwargs)
    hps.ngpus = dist.get_world_size()
    hps.argv = " ".join(sys.argv)
    hps.bs_sample = hps.nworkers = hps.bs

    # Setup dataset
    data_processor = DataProcessor(hps)

    # Setup models
    vqvae = make_vqvae(hps, device)
    print_once(f"Parameters VQVAE:{count_parameters(vqvae)}")
    if hps.prior:
        prior = make_prior(hps, vqvae, device)
        print_once(f"Parameters Prior:{count_parameters(prior)}")
        model = prior
    else:
        model = vqvae

    # Setup opt, ema and distributed_model.
    opt, shd, scalar = get_optimizer(model, hps)
    ema = get_ema(model, hps)
    distributed_model = get_ddp(model, hps)

    logger, metrics = init_logging(hps, local_rank, rank)
    logger.iters = model.step

    # Run training, eval, sample
    for epoch in range(hps.curr_epoch, hps.epochs):
        metrics.reset()
        data_processor.set_epoch(epoch)
        if hps.train:
            train_metrics = train(distributed_model, model, opt, shd, scalar, ema, logger, metrics, data_processor, hps)
            train_metrics['epoch'] = epoch
            if rank == 0:
                print('Train',' '.join([f'{key}: {val:0.4f}' for key,val in train_metrics.items()]))
            dist.barrier()

        if hps.test:
            if ema: ema.swap()
            test_metrics = evaluate(distributed_model, model, logger, metrics, data_processor, hps)
            test_metrics['epoch'] = epoch
            if rank == 0:
                print('Ema',' '.join([f'{key}: {val:0.4f}' for key,val in test_metrics.items()]))
            dist.barrier()
            if ema: ema.swap()
        dist.barrier()

if __name__ == '__main__':
    fire.Fire(run)


================================================
FILE: jukebox/transformer/__init__.py
================================================


================================================
FILE: jukebox/transformer/factored_attention.py
================================================
# Factored attention
import math
import numpy as np
import torch as t
import torch.nn as nn
import torch.nn.functional as F
from jukebox.transformer.ops import Conv1D
from jukebox.utils.checkpoint import checkpoint

def repeat(x, n, dim):
    if dim == -1:
        dim = len(x.shape) - 1
    return x.view(int(np.prod(x.shape[:dim+1])), 1, int(np.prod(x.shape[dim+1:]))).repeat(1,n,1).view(*x.shape[:dim], n * x.shape[dim], *x.shape[dim+1:])

def get_mask(mask, q_l, kv_l, blocks, spread, device, sample, sample_t):
    # returns a mask of shape 1 x 1 x q_l x kv_l or None if masking is not needed.
    if mask is None or q_l == 1:
        return None
    offset = sample_t - q_l if sample else max(kv_l - q_l, 0)
    if mask == 'autoregressive':
        # Masked dense
        mask = t.ones(q_l, kv_l, device=device).tril(offset)
    elif mask == 'summary':
        # Masked summary
        mask = t.nn.functional.pad(t.ones(q_l, q_l, device=device).tril().view(q_l, blocks, q_l // blocks)[:,:-1,-kv_l//blocks:],(0,0,1,0),value=1).contiguous().view(q_l, kv_l)
    elif mask == 'prime':
        mask = t.ones(q_l, kv_l, device=device).tril(offset)
    return mask.view(1,1,q_l,kv_l)

class FactoredAttention(nn.Module):
    def __init__(self, n_in, n_ctx, n_state, n_head,
                 attn_dropout=0.0, resid_dropout=0.0,
                 scale=True, mask=False,
                 zero_out=False, init_scale=1.0,
                 checkpoint_attn=0,
                 attn_func=0, blocks=None, spread=None,
                 encoder_dims=None, prime_len=None):
        super().__init__()
        self.n_in = n_in
        self.n_ctx = n_ctx # NOTE: n_ctx could be different within operations. This is complete n_ctx
        self.n_state = n_state
        assert n_state % n_head == 0
        self.n_head = n_head
        self.scale = scale
        self.mask = mask
        if attn_func == 6:
            self.c_attn = Conv1D(n_in, n_state, init_scale=init_scale)
            self.c_enc_kv = Conv1D(n_in, n_state * 2, init_scale=init_scale)
        else:
            self.c_attn = Conv1D(n_in, n_state * 3, init_scale=init_scale)
        self.c_proj = Conv1D(n_state, n_in, zero_out, init_scale=init_scale)
        self.attn_dropout = nn.Dropout(attn_dropout) if attn_dropout > 0.0 else lambda x: x
        self.resid_dropout = nn.Dropout(resid_dropout) if resid_dropout > 0.0 else lambda x: x

        # Sequence of length l is factored as [blocks, l // blocks]
        self.attn_func = attn_func
        self.qkv, self.attn, self.attn_mask = {
            0: (self.factored_qkv, self.dense_attn, 'autoregressive'),              # Attend to all positions
            1: (self.factored_qkv, self.block_attn, 'autoregressive'),              # Attend to your block
            2: (self.factored_qkv, self.transpose_block_attn, 'autoregressive'),    # Attend to transpose block
            3: (self.factored_qkv, self.prev_block_attn, None),                     # Attend to previous block
            4: (self.factored_qkv, self.summary_attn, 'summary'),                   # Attend to last position of each block
            5: (self.factored_qkv, self.summary_spread_attn, 'summary'),
            6: (self.decode_qkv, self.decode_attn, None),
            7: (self.prime_qkv, self.prime_attn, 'prime')
        }[attn_func] # Attend to last k position of each block

        self.blocks = blocks
        self.spread = spread
        if blocks is not None:
            assert n_ctx % blocks == 0
            self.block_ctx = n_ctx // blocks
        self.checkpoint_attn = checkpoint_attn # 0: None, 1: Attn after heads split, 2: Attn

        self.sample_t = 0
        self.cache = {}
        self.encoder_dims = encoder_dims
        self.prime_len = prime_len
        self.record_attn = False
        self.w = None

    def _attn(self, q, k, v, sample):
        scale = 1. / math.sqrt(math.sqrt(self.n_state // self.n_head))
        if self.training:
            w = t.matmul(q * scale, k * scale)
        else:
            w = t.matmul(q, k)
            w.mul_(scale*scale)
        wtype = w.dtype
        w = w.float()
        if self.mask:
            # Generate appropriate mask to mask out all positions before current
            # Might take up lot of memory for dense, so can cache it
            mask = get_mask(self.attn_mask, q.size(-2), k.size(-1), self.blocks, self.spread, w.device, sample, self.sample_t)
            if mask is not None:
                #print(mask)
                w = w * mask + -1e9 * (1 - mask)
            w = F.softmax(w, dim=-1).type(wtype)
        else:
            w = F.softmax(w, dim=-1).type(wtype)
        if self.record_attn:
            self.w = w #.float().cpu().numpy()
            if self.attn_func == 7:
                # only keep music queries and lyrics keys/values
                self.w = self.w[:,:,self.prime_len:,:self.prime_len]
        w = self.attn_dropout(w)
        a = t.matmul(w, v)
        return a

    def merge_heads(self, x):
        x = x.permute(0, 2, 1, 3).contiguous()
        new_x_shape = (*x.size()[:-2], x.size(-2) * x.size(-1))
        return x.view(*new_x_shape)  # in Tensorflow implem: fct merge_states

    def split_heads(self, x, k=False):
        new_x_shape = (*x.size()[:-1], self.n_head, x.size(-1) // self.n_head)
        x = x.view(*new_x_shape)  # in Tensorflow implem: fct split_states
        if k:
            return x.permute(0, 2, 3, 1)
        else:
            return x.permute(0, 2, 1, 3)

    def dense_attn(self, query, key, value, sample):
        query = self.split_heads(query)
        key = self.split_heads(key, k=True)
        value = self.split_heads(value)
        if self.checkpoint_attn == 1 and not sample:
            a = checkpoint(lambda q,k,v,s=sample: self._attn(q,k,v,s), (query, key, value),
                       (), True)
        else:
            a = self._attn(query,key,value,sample)
        a = self.merge_heads(a)
        return a

    def block_attn(self, q, k, v, sample):
        blocks, block_ctx = self.blocks, self.block_ctx # block_ctx is l // blocks for complete l ie l = n_ctx. Sampling has less l
        bs, l, d = v.shape # For sample, q_l = 1, k_l = v_l = sample_t
        if sample:
            assert l == self._suff_cache_len(), f"{l} != {self._suff_cache_len()}"
            return self.dense_attn(q, k, v, sample).view(bs, 1, d)
        else:
            ql = q.shape[1]
            q = q.view(bs * ql // block_ctx, block_ctx, d)
            if ql < l:
                l = ql
                k = k[:, -l:].contiguous()
                v = v[:, -l:].contiguous()
            k = k.view(bs * l // block_ctx, block_ctx, d)
            v = v.view(bs * l // block_ctx, block_ctx, d)
            return self.dense_attn(q, k, v, sample).view(bs, l, d)

    def transpose_block_attn(self, q, k, v, sample):
        blocks, block_ctx = self.blocks, self.block_ctx # block_ctx is l // blocks for complete l ie l = n_ctx. Sampling has less l
        bs, l, d = v.shape # For sample, q_l = 1, k_l = v_l = sample_t
        if sample:
            block_l = (l - 1) % block_ctx
            k = k[:,block_l::block_ctx,:]
            v = v[:,block_l::block_ctx,:]
            return self.dense_attn(q, k, v, sample).view(bs, 1, d)
        else:
            ql = q.shape[1]
            q = q.view(bs, ql // block_ctx, block_ctx, d).transpose(1,2).contiguous().view(bs * block_ctx, ql // block_ctx, d)
            k = k.view(bs,  l // block_ctx, block_ctx, d).transpose(1,2).contiguous().view(bs * block_ctx,  l // block_ctx, d)
            v = v.view(bs,  l // block_ctx, block_ctx, d).transpose(1,2).contiguous().view(bs * block_ctx,  l // block_ctx, d)
            return self.dense_attn(q, k, v, sample).view(bs, block_ctx, ql // block_ctx, d).transpose(1,2).contiguous().view(bs, ql, d)

    def prev_block_attn(self, q, k, v, sample):
        blocks, block_ctx = self.blocks, self.block_ctx # block_ctx is l // blocks for complete l ie l = n_ctx. Sampling has less l
        bs, l, d = v.shape # For sample, q_l = 1, k_l = v_l = sample_t
        if sample:
            assert l == self._suff_cache_len(), f"{l} != {self._suff_cache_len()}"
            block = (l - 1) // block_ctx
            prev_l = (block - 1) * block_ctx
            if block > 0:
                assert prev_l == 0
                k = k[:, prev_l:prev_l + block_ctx, :]
                v = v[:, prev_l:prev_l + block_ctx, :]
            else:
                k = t.zeros(bs, block_ctx, d, device=q.device, dtype=q.dtype)
                v = t.zeros(bs, block_ctx, d, device=q.device, dtype=q.dtype)
            return self.dense_attn(q, k, v, sample).view(bs, 1, d)
        else:
            ql = q.shape[1]
            q = q.view(bs * ql // block_ctx, block_ctx, d)
            k = t.nn.functional.pad(k.view(bs, l // block_ctx, block_ctx, d)[:, :-1, :, :], (0,0,0,0,1,0)).view(bs * l // block_ctx, block_ctx, d)
            v = t.nn.functional.pad(v.view(bs, l // block_ctx, block_ctx, d)[:, :-1, :, :], (0,0,0,0,1,0)).view(bs * l // block_ctx, block_ctx, d)
            if ql < l:
                qb = ql // block_ctx
                kb =  l // block_ctx
                l = ql
                k = k.view(bs, kb, block_ctx, d)[:, -qb:].contiguous().view(bs * qb, block_ctx, d)
                v = v.view(bs, kb, block_ctx, d)[:, -qb:].contiguous().view(bs * qb, block_ctx, d)
            return self.dense_attn(q, k, v, sample).view(bs, l, d)

    def summary_attn(self, q, k, v, sample):
        blocks, block_ctx = self.blocks, self.block_ctx # block_ctx is l // blocks for complete l ie l = n_ctx. Sampling has less l
        bs, l, d = v.shape # For sample, q_l = 1, k_l = v_l = sample_t
        if sample:
            k = t.nn.functional.pad(k[:, block_ctx-1:blocks*block_ctx-1:block_ctx, :],(0,0,1,0))
            v = t.nn.functional.pad(v[:, block_ctx-1:blocks*block_ctx-1:block_ctx, :],(0,0,1,0))
            return self.dense_attn(q, k, v, sample).view(bs, 1, d)
        else:
            k = t.nn.functional.pad(k.view(bs, blocks, l // blocks, d)[:, :-1, -1, :],(0,0,1,0)) # bs, blocks, d
            v = t.nn.functional.pad(v.view(bs, blocks, l // blocks, d)[:, :-1, -1, :],(0,0,1,0)) # bs, blocks, d
            return self.dense_attn(q, k, v, sample).view(bs, l, d)

    def summary_spread_attn(self, q, k, v, sample):
        blocks, block_ctx, spread = self.blocks, self.block_ctx, self.spread # block_ctx is l // blocks for complete l ie l = n_ctx. Sampling has less l
        bs, l, d = v.shape # For sample, q_l = 1, k_l = v_l = sample_t
        if sample:
            assert False, "Not yet implemented"
            # k = t.nn.functional.pad(k,(0,0,block_ctx,(-l)%block_ctx)).view(bs, -1, block_ctx, d)[:,:-1,-spread:,:].contiguous().view(bs, -1, d)
            # v = t.nn.functional.pad(v,(0,0,block_ctx,(-l)%block_ctx)).view(bs, -1, block_ctx, d)[:,:-1,-spread:,:].contiguous().view(bs, -1, d)
            # return self.dense_attn(q, k, v, sample).view(bs, 1, d)
        else:
            k = t.nn.functional.pad(k.view(bs, blocks, l // blocks, d)[:, :-1, -spread:, :],(0,0,0,0,1,0)).contiguous().view(bs, blocks * spread, d)  # bs, blocks * spread, d
            v = t.nn.functional.pad(v.view(bs, blocks, l // blocks, d)[:, :-1, -spread:, :],(0,0,0,0,1,0)).contiguous().view(bs, blocks * spread, d)  # bs, blocks * spread, d
            return self.dense_attn(q, k, v, sample).view(bs, l, d)

    def prime_attn(self, q, k, v, sample):
        prime_len = self._prime_len
        k = k[:, :prime_len]
        v = v[:, :prime_len]
        return self.dense_attn(q, k, v, sample)

    def decode_attn(self, q, k, v, sample):
        assert k.shape[1] == v.shape[1] == self.encoder_dims, f'k: {k.shape}, v: {v.shape}, enc_dims: {self.encoder_dims}'
        return self.dense_attn(q, k, v, sample)

    def factored_qkv(self, x, encoder_kv=None, sample=False):
        curr_ctx = x.shape[1]
        assert encoder_kv is None
        query, key, value = x.chunk(3, dim=2)
        if sample:
            self.sample_t += curr_ctx
            key, value = self._append_cache(key, value)
            l_cache = self._suff_cache_len()
            if self._cache_len() > l_cache:
                self._slice_cache(-l_cache)
            if curr_ctx > 1:
                if self.attn_func != 0:
                    query = self._pad_to_block_ctx(query, query=True)
                    key = self._pad_to_block_ctx(key)
                    value = self._pad_to_block_ctx(value)
                    assert key.shape[1] % self.block_ctx == 0
                    assert query.shape[1] % self.block_ctx == 0
                assert key.shape[1] == value.shape[1]
                assert query.shape[1] <= key.shape[1]
                sample = False
            else:
                key = self.cache['key']
                value = self.cache['value']
        return query, key, value, sample

    def prime_qkv(self, x, encoder_kv=None, sample=False):
        curr_ctx = x.shape[1]
        assert encoder_kv is None
        query, key, value = x.chunk(3, dim=2)
        if sample:
            if self._cache_len() < self._prime_len:
                self._append_cache(key, value)
            if self._cache_len() > self._prime_len:
                self._slice_cache(0, self._prime_len)
            key, value = self.cache['key'], self.cache['value']
            self.sample_t += curr_ctx
            assert key.shape[1] == value.shape[1] == self._suff_cache_len(), f'k: {key.shape}, v: {value.shape}, prime_dims: {self._suff_cache_len()}'
        else:
            assert key.shape[1] == value.shape[1] == self.n_ctx, f'k: {key.shape}, v: {value.shape}, prime_dims: {self.n_ctx}'
        assert key.shape[0] == value.shape[0] == query.shape[0], f'k: {key.shape}, v: {value.shape}, q: {query.shape}'
        assert key.shape[2] == value.shape[2] == query.shape[2], f'k: {key.shape}, v: {value.shape}, q: {query.shape}'
        return query, key, value, sample

    def decode_qkv(self, x, encoder_kv=None, sample=False):
        curr_ctx = x.shape[1]
        assert encoder_kv is not None
        query = x
        if sample:
            if self.sample_t == 0:
                self.cache['key'], self.cache['value'] = self.c_enc_kv(encoder_kv.type_as(x)).chunk(2, dim=2)
            key, value = self.cache['key'], self.cache['value']
            self.sample_t += curr_ctx
        else:
            key, value = self.c_enc_kv(encoder_kv.type_as(x)).chunk(2, dim=2)
        assert key.shape[0] == value.shape[0] == query.shape[0], f'k: {key.shape}, v: {value.shape}, q: {query.shape}'
        assert key.shape[1] == value.shape[1] == self.encoder_dims, f'k: {key.shape}, v: {value.shape}, enc_dims: {self.encoder_dims}'
        assert key.shape[2] == value.shape[2] == query.shape[2], f'k: {key.shape}, v: {value.shape}, q: {query.shape}'
        return query, key, value, sample

    def forward(self, x, encoder_kv=None, sample=False):
        curr_ctx = x.shape[1]
        x = self.c_attn(x)
        query, key, value, sample = self.qkv(x, encoder_kv=encoder_kv, sample=sample)
        if self.checkpoint_attn == 2 and not sample:
            a = checkpoint(lambda q,k,v,s=sample: self.attn(q,k,v,s), (query, key, value), (), True)
        else:
            a = self.attn(query,key,value,sample)
        if a.shape[1] != curr_ctx:
            offset = self._offset(curr_ctx)
            a = a[:,offset:offset + curr_ctx,:].contiguous()
        a = self.c_proj(a)
        return self.resid_dropout(a)

    @property
    def _prime_len(self):
        prime_len = self.prime_len
        assert prime_len is not None
        prime_blocks = (prime_len // self.blocks) + 1
        return prime_blocks * self.blocks

    def _offset(self, curr_ctx):
        if self.attn_func == 0:
            return 0
        return (self.sample_t - curr_ctx) % self.block_ctx

    def _pad_to_block_ctx(self, x, query=False):
        l = x.shape[1]
        offset = self._offset(l) if query else 0
        n_blocks = (l + offset + self.block_ctx - 1) // self.block_ctx
        pad = n_blocks * self.block_ctx - l - offset
        if pad == 0 and offset == 0:
            return x
        else:
            return F.pad(x, (0, 0, offset, pad))

    def _cache_len(self):
        return 0 if 'key' not in self.cache else self.cache['key'].shape[1]

    def _suff_cache_len(self):
        """
        Precondition:
            key and value are appended with the current context and
            self.sample_t reflects the 1-indexed sample location in the
            context.
        """
        if self.attn_func == 0:
            return self.sample_t
        elif self.attn_func == 1:
            return (self.sample_t - 1) % self.block_ctx + 1
        elif self.attn_func == 2:
            return self.sample_t
        elif self.attn_func == 3:
            if self.sample_t <= self.block_ctx:
                return self.sample_t
            else:
                curr_block = (self.sample_t - 1) % self.block_ctx + 1
                prev_block = self.block_ctx
                return curr_block + prev_block
        elif self.attn_func == 6:
            return self.encoder_dims
        elif self.attn_func == 7:
            return min(self.sample_t, self._prime_len)
        else:
            raise NotImplementedError()

    def _slice_cache(self, start, end=None):
        self.cache['key'] = self.cache['key'][:, start:end]
        self.cache['value'] = self.cache['value'][:, start:end]

    def _append_cache(self, key, value):
        if 'key' not in self.cache:
            self.cache['key'] = key
            self.cache['value'] = value
        else:
            old_key, old_value = key, value
            key = t.cat([self.cache['key'], key], dim=1)
            value = t.cat([self.cache['value'], value], dim=1)
            del self.cache['key']
            del self.cache['value']
            del old_key
            del old_value
            self.cache['key'] = key
            self.cache['value'] = value
        return self.cache['key'], self.cache['value']

    def del_cache(self):
        self.sample_t = 0
        if 'key' in self.cache:
            del self.cache['key']
        if 'value' in self.cache:
            del self.cache['value']
        self.cache = {}

    def check(self):
        blocks = self.blocks or 1
        spread = self.spread or 1
        bs, l, d = (4, self.n_ctx, self.n_in)
        x = t.randn(bs, l, d).cuda()
        x.requires_grad = True
        x_out = self.forward(x) # bs, l, d
        loss = x_out.mean(dim = -1) # bs, l
        pos = 60
        grad = t.autograd.grad(loss[2, pos], x)[0]

        assert grad.shape == (bs, l, d)
        assert (grad[:2] == 0).all()
        assert (grad[3:] == 0).all()
        assert (grad[2, (pos + 1):] == 0).all()
        pos_grad = (t.sum(grad[2] ** 2, dim=-1) > 0).nonzero().view(-1).cpu()

        block_pos = pos - (pos % (l // blocks))
        exp_pos_grad = {0: t.arange(pos),
                        1: t.arange(block_pos, pos),
                        2: t.arange(pos % (l // blocks), pos, l // blocks),
                        3: t.arange(block_pos - l // blocks, block_pos),
                        4: t.arange(l // blocks - 1, pos, l // blocks),
                        5: ((t.arange(pos) % (l // blocks) >= (l // blocks - spread)) & (t.arange(pos) < block_pos)).nonzero().view(-1)}[self.attn_func]
        exp_pos_grad = t.cat([exp_pos_grad, t.tensor([pos])], dim=-1)

        assert (len(pos_grad) == len(exp_pos_grad)) and (pos_grad == exp_pos_grad).all(), \
            f"Expected pos grad {exp_pos_grad} got {pos_grad} for attn_func {self.attn_func} pos {pos} l {l} blocks {blocks}"

    def check_cache(self, n_samples, sample_t, fp16):
        assert self.sample_t == sample_t, f"{self.sample_t} != {sample_t}"
        if sample_t == 0:
            assert self.cache == {}
        else:
            dtype = {True: t.float16, False: t.float32}[fp16]
            l_cache = self._suff_cache_len()
            assert self.cache['key'].shape == (n_samples, l_cache, self.n_state)
            assert self.cache['value'].shape == (n_samples, l_cache, self.n_state)
            assert self.cache['key'].dtype == dtype, f"Expected {dtype}, got {self.cache['key'].dtype}"
            assert self.cache['value'].dtype == dtype, f"Expected {dtype}, got {self.cache['value'].dtype}"

    def check_sample(self):
        t.manual_seed(42)
        bs, l, d = (4, self.n_ctx, self.n_in)
        prime = 5
        x = t.randn(bs, l, d).cuda()
        xs = t.chunk(x, l, dim=1)
        assert self.sample_t == 0
        assert self.cache == {}

        with t.no_grad():
            enc_l = self.encoder_dims
            encoder_kv = None
            if self.attn_func == 6:
                encoder_kv = t.randn(bs, enc_l, d).cuda()

            # Normal path
            x_out_normal = self.forward(x, encoder_kv=encoder_kv)

            # Sampling path
            x_out_sample = t.cat([self.forward(xs[i], encoder_kv=encoder_kv, sample=True) for i in range(l)],dim=1)
        max_err = t.max(t.abs(x_out_sample - x_out_normal))
        assert max_err < 1e-8, f"Max sampling err is {max_err} {[i for i in range(l) if t.max(t.abs(x_out_sample - x_out_normal)[:,i,:]) > 1e-8]}"

        with t.no_grad():
            x_out_normal = x_out_normal[:,:prime,:]
            # Prime sampling path
            self.del_cache()
            x_out_sample = self.forward(x[:,:prime,:].contiguous(), encoder_kv=encoder_kv, sample=True)
            self.check_cache(bs, prime, False)

        max_err = t.max(t.abs(x_out_sample - x_out_normal))
        assert max_err < 1e-8, f"Max prime sampling err is {max_err} {[i for i in range(prime) if t.max(t.abs(x_out_sample - x_out_normal)[:,i,:]) > 1e-8]}"

    def check_chunks(self, chunk_size):
        t.manual_seed(42)
        bs, l, d = (4, self.n_ctx, self.n_in)
        enc_l = self.encoder_dims
        assert l % chunk_size == 0
        n_chunks = l // chunk_size
        with t.no_grad():
            encoder_kv = None
            x = t.randn(bs, l, d).cuda()
            if self.attn_func == 6:
                encoder_kv = t.randn(bs, enc_l, d).cuda()

            self.del_cache()
            y_forw = self.forward(x, encoder_kv=encoder_kv, sample=False)
            self.del_cache()
            y_forw_sample = self.forward(x, encoder_kv=encoder_kv, sample=True)
            max_err = t.max(t.abs(y_forw - y_forw_sample))
            assert max_err <= 1e-6, f"Max err is {max_err} {[i for i in range(l) if t.max(t.abs(y_forw - y_forw_sample)[:, i, :]) > 1e-6]}"

            self.del_cache()
            x_chunks = t.chunk(x, n_chunks, dim=1)
            y_chunks = []
            total_len = 0
            for x_chunk in x_chunks:
                y_chunk = self.forward(x_chunk.contiguous(), encoder_kv=encoder_kv, sample=True)
                total_len += x_chunk.shape[1]
                self.check_cache(bs, total_len, False)
                y_chunks.append(y_chunk)
            y_forw_in_chunks = t.cat(y_chunks, dim=1)

            max_err = t.max(t.abs(y_forw - y_forw_in_chunks))
            assert max_err <= 1e-6, f"Max err is {max_err} {[i for i in range(l) if t.max(t.abs(y_forw - y_forw_in_chunks)[:, i, :]) > 1e-6]}"


if __name__ == '__main__':
    from jukebox.utils.dist_utils import setup_dist_from_mpi
    setup_dist_from_mpi(port=29600)
    n_in = 16
    n_state = n_in * 2
    n_ctx = 6144
    n_head = 4
    n_depth = 12
    blocks = 64
    chunk_size = 8
    for attn_func in [0, 1, 2, 3, 6, 7]:
        encoder_dims = {0: 0, 1: 0, 2: 0, 3: 0, 6: 64, 7: 0}[attn_func]
        prime_len = {0: 0, 1: 0, 2: 0, 3: 0, 6: 0, 7: 384}[attn_func]
        attn = FactoredAttention(n_in, n_ctx + prime_len, n_state, n_head, mask=True,
                                 attn_func=attn_func, blocks=blocks,
                                 encoder_dims=encoder_dims, prime_len=prime_len)
        attn.training = False
        attn.check_sample()
        attn.check_chunks(chunk_size)
        print(f"Checked attn_func: {attn_func}")


================================================
FILE: jukebox/transformer/ops.py
================================================
import math
import numpy as np
import torch as t
import torch.nn as nn
import torch.nn.functional as F

# Import FusedLayerNorm if we have apex, otherwise use regular LayerNorm
try:
    from apex.normalization import FusedLayerNorm
    print("Using apex FusedLayerNorm")
except ImportError:
    from torch.nn import LayerNorm as FusedLayerNorm

class LayerNorm(FusedLayerNorm):
    def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
        super().__init__(normalized_shape, eps=eps, elementwise_affine=elementwise_affine)
        self.width = np.prod(normalized_shape)
        self.max_numel = 65535*self.width

    def forward(self, input):
        if input.numel() > self.max_numel:
            return F.layer_norm(input.float(), self.normalized_shape, self.weight, self.bias, self.eps).type_as(input)
        else:
            return super(LayerNorm, self).forward(input.float()).type_as(input)

def gelu(x):
    return 0.5 * x * (1 + t.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * t.pow(x, 3))))


def swish(x):
    return x * t.sigmoid(x)

@t.jit.script
def quick_gelu(x):
    return x * t.sigmoid(1.702 * x)

@t.jit.script
def quick_gelu_bwd(x, grad_output):
    sig = t.sigmoid(1.702 * x)
    return grad_output * sig * (1.702 * x * (1 - sig) + 1.)

class QuickGelu(t.autograd.Function):
    @staticmethod
    def forward(ctx, x):
        ctx.save_for_backward(x)
        return quick_gelu(x)

    @staticmethod
    def backward(ctx, grad_output):
        return quick_gelu_bwd(ctx.saved_tensors[0], grad_output)

def memory_efficient_quick_gelu(x):
    return QuickGelu.apply(x)

ACT_FNS = {
    'relu': t.nn.functional.relu,
    'swish': swish,
    'gelu': gelu,
    'quick_gelu': memory_efficient_quick_gelu #quick_gelu
}

def _move_to_gpu_and_convert_conv_weights_to_fp16(l):
    l.cuda()
    if isinstance(l, Conv1D):
        l.w.data = l.w.data.half()

def _convert_conv_weights_to_fp32(l):
    if isinstance(l, Conv1D):
        l.w.data = l.w.data.float()

def _convert_conv_weights_to_fp16(l):
    if isinstance(l, Conv1D):
        l.w.data = l.w.data.half()

def _convert_embedding_weights_to_fp16(l):
    if isinstance(l, t.nn.Embedding):
        l.weight.data = l.weight.data.half()

def _convert_embedding_weights_to_fp32(l):
    if isinstance(l, t.nn.Embedding):
        l.weight.data = l.weight.data.float()

class Conv1D(nn.Module):
    def __init__(self, n_in, n_out, zero_out=False, init_scale=1.0):
        super(Conv1D, self).__init__()
        self.n_in = n_in
        self.n_out = n_out
        if zero_out:
            w = t.zeros(n_in, n_out)
        else:
            w = t.empty(n_in, n_out)
            nn.init.normal_(w, std=0.02 * init_scale)
        b = t.zeros(n_out)
        self.w = nn.Parameter(w)
        self.b = nn.Parameter(b)

    def forward(self, x):
        size_out = (*x.size()[:-1], self.n_out)
        x = t.addmm(self.b.type_as(x), x.view(-1, x.size(-1)), self.w.type_as(x)) # If x if float then float else half
        x = x.view(*size_out)
        return x

# For large contexts, mask's can take up memory, so you can make a single saved mask for all layers
class Mask(nn.Module):
    def __init__(self, n_ctx):
        super().__init__()
        self.register_buffer('b', t.tril(t.ones(n_ctx, n_ctx)).view(1, 1, n_ctx, n_ctx))

    def forward(self, w):
        w = w * self.b + -1e9 * (1 - self.b)  # For fp16 do w = w.float().masked_fill(self.b, float('-inf')
        return w

def filter_logits(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
        Args:
            logits: logits distribution shape (vocabulary size)
            top_k >0: keep only top k tokens with highest probability (top-k filtering).
            top_p >0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
    """
    #assert logits.dim() == 2  # batch size 1 for now - could be updated for more but the code would be less clear
    logits = logits.clone()
    top_k = min(top_k, logits.size(-1))  # Safety check
    assert (top_k == 0) or (top_p == 0.0)
    if top_k > 0:
        # Remove all tokens with a probability less than the last token of the top-k
        indices_to_remove = logits < t.topk(logits, top_k, dim=-1)[0][..., -1:]
        logits[indices_to_remove] = filter_value

    if top_p > 0.0:
        sorted_logits, sorted_indices = t.sort(logits, descending=True, dim=-1)
        cumulative_probs = t.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep also the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        #indices_to_remove = sorted_indices[sorted_indices_to_remove]
        indices_to_remove = t.zeros_like(logits, dtype=t.uint8).scatter_(dim=-1, index=sorted_indices, src=sorted_indices_to_remove)
        logits[indices_to_remove] = filter_value
    return logits


================================================
FILE: jukebox/transformer/transformer.py
================================================
import functools
import numpy as np
import torch as t
import torch.nn as nn
import jukebox.utils.dist_adapter as dist

from jukebox.transformer.ops import Conv1D, ACT_FNS, LayerNorm
from jukebox.transformer.factored_attention import FactoredAttention
from jukebox.utils.checkpoint import checkpoint

def _convert_mlp_traced(l):
    if isinstance(l, ResAttnBlock):
        l.mlp = t.jit.trace(l.mlp, t.randn(1, 1, l.n_in).cuda())

def _convert_mlp_traced_fp16(l):
    if isinstance(l, ResAttnBlock):
        l.mlp = t.jit.trace(l.mlp, t.randn(1, 1, l.n_in).cuda().half())

class MLP(nn.Module):
    def __init__(self, n_in, n_state, resid_dropout=0.0, afn='quick_gelu', zero_out=False, init_scale=1.0):
        super().__init__()
        self.c_fc = Conv1D(n_in, n_state, init_scale=init_scale)
        self.c_proj = Conv1D(n_state, n_in, zero_out, init_scale=init_scale)
        self.act = ACT_FNS[afn]
        self.resid_dropout = nn.Dropout(resid_dropout) if resid_dropout > 0.0 else lambda x: x

    def forward(self, x):
        m = self.act(self.c_fc(x))
        m = self.c_proj(m)
        return self.resid_dropout(m)

class ResAttnBlock(nn.Module):
    def __init__(self, n_in, n_ctx, n_head,
                 attn_dropout=0.0, resid_dropout=0.0,
                 afn='quick_gelu', scale=True, mask=False,
                 zero_out=False, init_scale=1.0, res_scale=1.0,
                 m_attn = 0.25, m_mlp = 1.,
                 checkpoint_attn = 0, checkpoint_mlp = 0,
                 attn_func=0, blocks=None, spread=None,
                 encoder_dims=None, prime_len=None):
        super().__init__()
        self.attn = FactoredAttention(n_in=n_in, n_ctx=n_ctx, n_state=int(m_attn * n_in), n_head=n_head,
                                      attn_dropout=attn_dropout, resid_dropout=resid_dropout,
                                      scale=scale, mask=mask,
                                      zero_out=zero_out, init_scale=init_scale,
                                      checkpoint_attn=checkpoint_attn,
                                      attn_func=attn_func, blocks=blocks, spread=spread,
                                      encoder_dims=encoder_dims, prime_len=prime_len)
        self.ln_0 = LayerNorm(n_in)
        self.mlp = MLP(n_in=n_in, n_state=int(m_mlp * n_in),
                       resid_dropout=resid_dropout,
                       afn=afn,
                       zero_out=zero_out, init_scale=init_scale)
        self.ln_1 = LayerNorm(n_in)
        self.res_scale = res_scale

        self.checkpoint_attn = checkpoint_attn
        self.checkpoint_mlp = checkpoint_mlp
        self.n_in = n_in
        self.attn_func = attn_func

    def forward(self, x, encoder_kv, sample=False):
        if sample:
            a = self.attn(self.ln_0(x), encoder_kv, sample)
            m = self.mlp(self.ln_1(x + a))
        else:
            if self.attn_func == 6:
                assert encoder_kv is not None
                a = checkpoint(lambda _x,_enc_kv,_s=sample: self.attn(self.ln_0(_x),_enc_kv,_s),
                               (x,encoder_kv),
                               (*self.attn.parameters(), *self.ln_0.parameters()),
                               self.checkpoint_attn == 3)  # 2 recomputes after the projections, and 1 recomputes after head splitting.
            else:
                assert encoder_kv is None
                a = checkpoint(lambda _x,_enc_kv=None,_s=sample: self.attn(self.ln_0(_x),_enc_kv,_s),
                               (x,),
                               (*self.attn.parameters(), *self.ln_0.parameters()),
                               self.checkpoint_attn == 3)  # 2 recomputes after the projections, and 1 recomputes after head splitting.
            m = checkpoint(lambda _x: self.mlp(self.ln_1(_x)), (x + a,),
                           (*self.mlp.parameters(), *self.ln_1.parameters()),
                           self.checkpoint_mlp == 1)
        if self.res_scale == 1.0:
            h = x + a + m
        else:
            h = x + self.res_scale * (a + m)
        return h

class Transformer(nn.Module):
    def __init__(self, n_in, n_ctx, n_head, n_depth,
                 attn_dropout=0.0, resid_dropout=0.0,
                 afn='quick_gelu', scale=True, mask=False,
                 zero_out=False, init_scale=1.0, res_scale=False,
                 m_attn=0.25, m_mlp=1.,
                 checkpoint_attn=0, checkpoint_mlp=0, checkpoint_res=0,
                 attn_order=0, blocks=None, spread=None,
                 encoder_dims=None, prime_len=None):
        super().__init__()
        self.n_in = n_in
        self.n_ctx = n_ctx
        self.encoder_dims = encoder_dims
        self.blocks = blocks
        if blocks is not None:
            assert n_ctx % blocks == 0
            self.block_ctx = n_ctx // blocks
        self.prime_len = prime_len
        self.n_head = n_head

        res_scale = 1.0 / n_depth if res_scale else 1.0

        # Orders of attn_func
        attn_func = {0: lambda d: 0,                    # Complete dense attn
                     1: lambda d: [1,2][d%2],           # Alternate row and column attn
                     2: lambda d: [1,2,3][d % 3],       # Alternate row, column and previous row attn
                     3: lambda d: [1,4][d % 2],         # Alternate row and last column
                     4: lambda d: [1,5][d % 2],         # Alternate row and last k columns
                     5: lambda d: [1,4,1,1][d % 4],      # Alternate row, last column, row, row
                     6: lambda d: [1,2,3,6][d % 4],
                     7: lambda d: [*[1,2,3]*5,6][d%16],
                     8: lambda d: [1,2,3,1,2,3,1,2,3,6][d%10], # Used by separated_enc_dec model with lyrics
                     9: lambda d: [1,2,3,0][d % 4],
                     10: lambda d: [*[1,2,3,1,2,3,1,2,3],*[1,2,3,1,2,3,1,2,3,6]*7][d%79], # Used by large separated_enc_dec model with lyrics
                     11: lambda d: [6,6,0][d%3] if d%16 == 15 else [1,2,3][d%3],
                     12: lambda d: [7,7,0][d%3] if d%16 == 15 else [1,2,3][d%3], # Used by single_enc_dec model with lyrics
                     }[attn_order]

        attn_cycle = {0:1, 1:2, 2:3, 3:2, 4:2, 5:4, 6:4, 7:16, 8:10, 9:4, 10:79, 11:16, 12:16}[attn_order]
        #assert n_depth % attn_cycle == 0, f'Depth {n_depth} not a multiple of cycle {attn_cycle} for attn_order {attn_order}'

        attn_block = lambda d: ResAttnBlock(n_in=n_in, n_ctx=n_ctx, n_head=n_head,
                                  attn_dropout=attn_dropout, resid_dropout=resid_dropout,
                                  afn=afn, scale=scale, mask=mask,
                                  zero_out=zero_out if attn_func(d) !=6 else True,
                                  init_scale=init_scale, res_scale=res_scale,
                                  m_attn=m_attn, m_mlp=m_mlp,
                                  checkpoint_attn=checkpoint_attn, checkpoint_mlp=checkpoint_mlp,
                                  attn_func=attn_func(d), blocks=blocks, spread=spread,
                                  encoder_dims=encoder_dims, prime_len=prime_len)

        self.checkpoint_res = checkpoint_res
        self._attn_mods = nn.ModuleList()
        for d in range(n_depth):
            self._attn_mods.append(attn_block(d))
        self.ws = []


    def set_record_attn(self, record_attn):
        """
        Arguments:
            record_attn (bool or set): Makes forward prop dump self-attention
                softmaxes to self.ws. Either a set of layer indices indicating
                which layers to store, or a boolean value indicating whether to
                dump all.
        """
        def _should_record_attn(layer_idx):
            if isinstance(record_attn, bool):
                return record_attn
            return layer_idx in record_attn
        for i, l in enumerate(self._attn_mods):
            l.attn.record_attn = _should_record_attn(i)
        if record_attn:
            assert self.ws == []
            for l in self._attn_mods:
                assert l.attn.w == None
        else:
            self.ws = []
            for l in self._attn_mods:
                l.attn.w = None

    def forward(self, x, encoder_kv=None, sample=False, fp16=False, fp16_out=False):
        if fp16:
            x = x.half()

        # Blocks
        for i,l in enumerate(self._attn_mods):
            if self.checkpoint_res == 1 and not sample:
                if l.attn_func == 6:
                    assert encoder_kv is not None
                    f = functools.partial(l, sample=sample)
                    x = checkpoint(f, (x, encoder_kv), l.parameters(), True)
                else:
                    f = functools.partial(l, encoder_kv=None, sample=sample)
                    x = checkpoint(f, (x,), l.parameters(), True)
            else:
                if l.attn_func == 6:
                    x = l(x, encoder_kv=encoder_kv, sample=sample)
                else:
                    x = l(x, encoder_kv=None, sample=sample)
            if l.attn.record_attn:
                self.ws.append(l.attn.w)
        if not fp16_out:
            x = x.float()
        return x

    def check_cache(self, n_samples, sample_t, fp16):
        for l in self._attn_mods:
            l.attn.check_cache(n_samples, sample_t, fp16)

    def del_cache(self):
        for l in self._attn_mods:
            l.attn.del_cache()

    def check_sample(self):
        bs, l, s, d = (4, self.n_ctx, self.encoder_dims, self.n_in)
        prime = 5
        with t.no_grad():
            encoder_kv = t.randn(bs, s, d).cuda()
            x = t.randn(bs, l, d).cuda()
            y_forw = self.forward(x, encoder_kv=encoder_kv, sample=True)

            self.del_cache()
            x_chunks = t.chunk(x, 4, dim=1)
            y_chunks = []
            n = 0
            for x_chunk in x_chunks:
                self.check_cache(bs, n, False)
                y_chunk = self.forward(x_chunk, encoder_kv=encoder_kv, sample=True)
                y_chunks.append(y_chunk)
                n += x_chunk.shape[1]
            self.check_cache(bs, n, False)
            y_forw_in_chunks = t.cat(y_chunks, dim=1)

            max_err = t.max(t.abs(y_forw - y_forw_in_chunks))
            assert max_err <= 1e-6, f"Max err is {max_err} {[i for i in range(l) if t.max(t.abs(y_forw - y_forw_in_chunks)[:, i, :]) > 1e-6]}"


if __name__ == '__main__':
    from jukebox.utils.dist_utils import setup_dist_from_mpi
    setup_dist_from_mpi(port=29600)
    n_in = 16
    n_ctx = 192
    n_head = 4
    n_depth = 12
    blocks = 16
    for attn_order in [0,2,6]:
        encoder_dims = {0: 0, 2: 0, 6: 64}[attn_order]
        prior = Transformer(n_in, n_ctx, n_head, n_depth, mask=True, attn_order=attn_order, encoder_dims=encoder_dims, blocks=blocks).cuda()
        prior.training = False
        prior.check_sample()
        print(f"Checked attn_order: {attn_order}")


================================================
FILE: jukebox/utils/__init__.py
================================================


================================================
FILE: jukebox/utils/audio_utils.py
================================================
import numpy as np
import torch as t
import jukebox.utils.dist_adapter as dist
import soundfile
import librosa
from jukebox.utils.dist_utils import print_once

class DefaultSTFTValues:
    def __init__(self, hps):
        self.sr = hps.sr
        self.n_fft = 2048
        self.hop_length = 256
        self.window_size = 6 * self.hop_length

class STFTValues:
    def __init__(self, hps, n_fft, hop_length, window_size):
        self.sr = hps.sr
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.window_size = window_size

def calculate_bandwidth(dataset, hps, duration=600):
    hps = DefaultSTFTValues(hps)
    n_samples = int(dataset.sr * duration)
    l1, total, total_sq, n_seen, idx = 0.0, 0.0, 0.0, 0.0, dist.get_rank()
    spec_norm_total, spec_nelem = 0.0, 0.0
    while n_seen < n_samples:
        x = dataset[idx]
        if isinstance(x, (tuple, list)):
            x, y = x
        samples = x.astype(np.float64)
        stft = librosa.core.stft(np.mean(samples, axis=1), hps.n_fft, hop_length=hps.hop_length, win_length=hps.window_size)
        spec = np.absolute(stft)
        spec_norm_total += np.linalg.norm(spec)
        spec_nelem += 1
        n_seen += int(np.prod(samples.shape))
        l1 += np.sum(np.abs(samples))
        total += np.sum(samples)
        total_sq += np.sum(samples ** 2)
        idx += max(16, dist.get_world_size())

    if dist.is_available():
        from jukebox.utils.dist_utils import allreduce
        n_seen = allreduce(n_seen)
        total = allreduce(total)
        total_sq = allreduce(total_sq)
        l1 = allreduce(l1)
        spec_nelem = allreduce(spec_nelem)
        spec_norm_total = allreduce(spec_norm_total)

    mean = total / n_seen
    bandwidth = dict(l2 = total_sq / n_seen - mean ** 2,
                     l1 = l1 / n_seen,
                     spec = spec_norm_total / spec_nelem)
    print_once(bandwidth)
    return bandwidth

def audio_preprocess(x, hps):
    # Extra layer in case we want to experiment with different preprocessing
    # For two channel, blend randomly into mono (standard is .5 left, .5 right)

    # x: NTC
    x = x.float()
    if x.shape[-1]==2:
        if hps.aug_blend:
            mix=t.rand((x.shape[0],1), device=x.device) #np.random.rand()
        else:
            mix = 0.5
        x=(mix*x[:,:,0]+(1-mix)*x[:,:,1])
    elif x.shape[-1]==1:
        x=x[:,:,0]
    else:
        assert False, f'Expected channels {hps.channels}. Got unknown {x.shape[-1]} channels'

    # x: NT -> NTC
    x = x.unsqueeze(2)
    return x

def audio_postprocess(x, hps):
    return x

def stft(sig, hps):
    return t.stft(sig, hps.n_fft, hps.hop_length, win_length=hps.window_size, window=t.hann_window(hps.window_size, device=sig.device))

def spec(x, hps):
    return t.norm(stft(x, hps), p=2, dim=-1)

def norm(x):
    return (x.view(x.shape[0], -1) ** 2).sum(dim=-1).sqrt()

def squeeze(x):
    if len(x.shape) == 3:
        assert x.shape[-1] in [1,2]
        x = t.mean(x, -1)
    if len(x.shape) != 2:
        raise ValueError(f'Unknown input shape {x.shape}')
    return x

def spectral_loss(x_in, x_out, hps):
    hps = DefaultSTFTValues(hps)
    spec_in = spec(squeeze(x_in.float()), hps)
    spec_out = spec(squeeze(x_out.float()), hps)
    return norm(spec_in - spec_out)

def multispectral_loss(x_in, x_out, hps):
    losses = []
    assert len(hps.multispec_loss_n_fft) == len(hps.multispec_loss_hop_length) == len(hps.multispec_loss_window_size)
    args = [hps.multispec_loss_n_fft,
            hps.multispec_loss_hop_length,
            hps.multispec_loss_window_size]
    for n_fft, hop_length, window_size in zip(*args):
        hps = STFTValues(hps, n_fft, hop_length, window_size)
        spec_in = spec(squeeze(x_in.float()), hps)
        spec_out = spec(squeeze(x_out.float()), hps)
        losses.append(norm(spec_in - spec_out))
    return sum(losses) / len(losses)

def spectral_convergence(x_in, x_out, hps, epsilon=2e-3):
    hps = DefaultSTFTValues(hps)
    spec_in = spec(squeeze(x_in.float()), hps)
    spec_out = spec(squeeze(x_out.float()), hps)

    gt_norm = norm(spec_in)
    residual_norm = norm(spec_in - spec_out)
    mask = (gt_norm > epsilon).float()
    return (residual_norm * mask) / t.clamp(gt_norm, min=epsilon)

def log_magnitude_loss(x_in, x_out, hps, epsilon=1e-4):
    hps = DefaultSTFTValues(hps)
    spec_in = t.log(spec(squeeze(x_in.float()), hps) + epsilon)
    spec_out = t.log(spec(squeeze(x_out.float()), hps) + epsilon)
    return t.mean(t.abs(spec_in - spec_out))

def load_audio(file, sr, offset, duration, mono=False):
    # Librosa loads more filetypes than soundfile
    x, _ = librosa.load(file, sr=sr, mono=mono, offset=offset/sr, duration=duration/sr)
    if len(x.shape) == 1:
        x = x.reshape((1, -1))
    return x    


def save_wav(fname, aud, sr):
    # clip before saving?
    aud = t.clamp(aud, -1, 1).cpu().numpy()
    for i in list(range(aud.shape[0])):
        soundfile.write(f'{fname}/item_{i}.wav', aud[i], samplerate=sr, format='wav')


================================================
FILE: jukebox/utils/checkpoint.py
================================================
# Simple gradient checkpointing. Works with distributed data parallel
import torch as t

def checkpoint(func, inputs, params, flag):
    if flag:
        args = inputs + tuple(params)
        return CheckpointFunction.apply(func, len(inputs), *args)
    else:
        return func(*inputs)

class CheckpointFunction(t.autograd.Function):
    @staticmethod
    def forward(ctx, run_function, length, *args):
        ctx.run_function = run_function
        ctx.input_tensors = list(args[:length])
        ctx.input_params = list(args[length:])
        with t.no_grad():
            output_tensors = ctx.run_function(*ctx.input_tensors)
        return output_tensors

    @staticmethod
    def backward(ctx, *output_grads):
        for i in range(len(ctx.input_tensors)):
            temp = ctx.input_tensors[i]
            ctx.input_tensors[i] = temp.detach()
            ctx.input_tensors[i].requires_grad = temp.requires_grad
        with t.enable_grad():
            output_tensors = ctx.run_function(*ctx.input_tensors)
        input_grads = t.autograd.grad(output_tensors, ctx.input_tensors + ctx.input_params, output_grads, allow_unused=True)
        del ctx.input_tensors
        del output_tensors
        return (None, None) + input_grads


================================================
FILE: jukebox/utils/dist_adapter.py
================================================
import torch.distributed as dist
from enum import Enum

class ReduceOp(Enum):
    SUM = 0,
    PRODUCT = 1,
    MIN = 2,
    MAX = 3

    def ToDistOp(self):
        return {
            self.SUM: dist.ReduceOp.SUM,
            self.PRODUCT: dist.ReduceOp.PRODUCT,
            self.MIN: dist.ReduceOp.MIN,
            self.MAX: dist.ReduceOp.MAX
        }[self]

def is_available():
    return dist.is_available()

def get_rank():
    if is_available():
        return _get_rank()
    else:
        return 0

def get_world_size():
    if is_available():
        return _get_world_size()
    else:
        return 1

def barrier():
    if is_available():
        return _barrier()
    #else: do nothing

def all_gather(tensor_list, tensor):
    if is_available():
        return _all_gather(tensor_list, tensor)
    else:
        tensor_list[0] = tensor

def all_reduce(tensor, op=ReduceOp.SUM):
    if is_available():
        return _all_reduce(tensor, op)
    #else: do nothing

def reduce(tensor, dst, op=ReduceOp.SUM):
    if is_available():
        return _reduce(tensor, dst, op)
    #else: do nothing

def broadcast(tensor, src):
    if is_available():
        return _broadcast(tensor, src)
    #else: do nothing

def init_process_group(backend, init_method):
    if is_available():
        return _init_process_group(backend, init_method)
    #else: do nothing

def _get_rank():
    return dist.get_rank()

def _barrier():
    return dist.barrier()

def _get_world_size():
    return dist.get_world_size()

def _all_gather(tensor_list, tensor):
    return dist.all_gather(tensor_list, tensor)

def _all_reduce(tensor, op):
    return dist.all_reduce(tensor, op.ToDistOp())

def _reduce(tensor, dst, op):
    return dist.reduce(tensor, dst, op.ToDistOp())

def _broadcast(tensor, src):
    return dist.broadcast(tensor, src)

def _init_process_group(backend, init_method):
    return dist.init_process_group(backend, init_method)

================================================
FILE: jukebox/utils/dist_utils.py
================================================
import os
from time import sleep
import torch
import jukebox.utils.dist_adapter as dist

def print_once(msg):
    if (not dist.is_available()) or dist.get_rank()==0:
        print(msg)

def print_all(msg):
    if (not dist.is_available()):
        print(msg)
    elif dist.get_rank()%8==0:
        print(f'{dist.get_rank()//8}: {msg}')

def allgather(x):
    xs = [torch.empty_like(x) for _ in range(dist.get_world_size())]
    dist.all_gather(xs, x)
    xs = torch.cat(xs, dim=0)
    return xs

def allreduce(x, op=dist.ReduceOp.SUM):
    x = torch.tensor(x).float().cuda()
    dist.all_reduce(x, op=op)
    return x.item()

def allgather_lists(xs):
    bs = len(xs)
    total_bs = dist.get_world_size()*len(xs)
    lengths = torch.tensor([len(x) for x in xs], dtype=t.long, device='cuda')
    lengths = allgather(lengths)
    assert lengths.shape == (total_bs,)
    max_length = torch.max(lengths).item()

    xs = torch.tensor([[*x, *[0]*(max_length - len(x))] for x in xs], device='cuda')
    assert xs.shape == (bs, max_length), f'Expected {(bs, max_length)}, got {xs.shape}'
    xs = allgather(xs)
    assert xs.shape == (total_bs,max_length), f'Expected {(total_bs, max_length)}, got {xs.shape}'

    return [xs[i][:lengths[i]].cpu().numpy().tolist() for i in range(total_bs)]

def setup_dist_from_mpi(
    master_addr="127.0.0.1", backend="nccl", port=29500, n_attempts=5, verbose=False
):
    if dist.is_available():
        return _setup_dist_from_mpi(master_addr, backend, port, n_attempts, verbose)
    else:
        use_cuda = torch.cuda.is_available()
        print(f'Using cuda {use_cuda}')

        mpi_rank = 0
        local_rank = 0

        device = torch.device("cuda", local_rank) if use_cuda else torch.device("cpu")
        torch.cuda.set_device(local_rank)

        return mpi_rank, local_rank, device

def _setup_dist_from_mpi(master_addr, backend, port, n_attempts, verbose):
    from mpi4py import MPI  # This must be imported in order to get e   rrors from all ranks to show up

    mpi_rank = MPI.COMM_WORLD.Get_rank()
    mpi_size = MPI.COMM_WORLD.Get_size()


    os.environ["RANK"] = str(mpi_rank)
    os.environ["WORLD_SIZE"] = str(mpi_size)
    os.environ["MASTER_ADDR"] = master_addr
    os.environ["MASTER_PORT"] = str(port)
    os.environ["NCCL_LL_THRESHOLD"] = "0"
    os.environ["NCCL_NSOCKS_PERTHREAD"] = "2"
    os.environ["NCCL_SOCKET_NTHREADS"] = "8"

    # Pin this rank to a specific GPU on the node
    local_rank = mpi_rank % 8
    if torch.cuda.is_available():
        torch.cuda.set_device(local_rank)

    if verbose:
        print(f"Connecting to master_addr: {master_addr}")

    # There is a race condition when initializing NCCL with a large number of ranks (e.g 500 ranks)
    # We guard against the failure and then retry
    for attempt_idx in range(n_attempts):
        try:
            dist.init_process_group(backend=backend, init_method=f"env://")
            assert dist.get_rank() == mpi_rank

            use_cuda = torch.cuda.is_available()
            print(f'Using cuda {use_cuda}')
            local_rank = mpi_rank % 8
            device = torch.device("cuda", local_rank) if use_cuda else torch.device("cpu")
            torch.cuda.set_device(local_rank)

            return mpi_rank, local_rank, device
        except RuntimeError as e:
            print(f"Caught error during NCCL init (attempt {attempt_idx} of {n_attempts}): {e}")
            sleep(1 + (0.01 * mpi_rank))  # Sleep to avoid thundering herd
            pass

    raise RuntimeError("Failed to initialize NCCL")


================================================
FILE: jukebox/utils/ema.py
================================================
import torch
from torch._utils import _flatten_dense_tensors
import numpy as np

# EMA always in float, as accumulation needs lots of bits
class EMA:
    def __init__(self, params, mu=0.999):
        self.mu = mu
        self.state = [(p, self.get_model_state(p)) for p in params if p.requires_grad]

    def get_model_state(self, p):
        return p.data.float().detach().clone()

    def step(self):
        for p, state in self.state:
            state.mul_(self.mu).add_(1 - self.mu, p.data.float())

    def swap(self):
        # swap ema and model params
        for p, state in self.state:
            other_state = self.get_model_state(p)
            p.data.copy_(state.type_as(p.data))
            state.copy_(other_state)


class CPUEMA:
    def __init__(self, params, mu=0.999, freq=1):
        self.mu = mu**freq
        self.state = [(p, self.get_model_state(p)) for p in params if p.requires_grad]
        self.freq = freq
        self.steps = 0

    def get_model_state(self, p):
        with torch.no_grad():
            state = p.data.float().detach().cpu().numpy()
        return state

    def step(self):
        with torch.no_grad():
            self.steps += 1
            if self.steps % self.freq == 0:
                for i in range(len(self.state)):
                    p, state = self.state[i]
                    state = torch.from_numpy(state).cuda()
                    state.mul_(self.mu).add_(1 - self.mu, p.data.float())
                    self.state[i] = (p, state.cpu().numpy())

    def swap(self):
        with torch.no_grad():
            # swap ema and model params
            for p, state in self.state:
                other_state = self.get_model_state(p)
                p.data.copy_(torch.from_numpy(state).type_as(p.data))
                np.copyto(state, other_state)

class FusedEMA:
    def __init__(self, params, mu=0.999):
        self.mu = mu
        params = list(params)
        self.params = {}
        self.params['fp16'] = [p for p in params if p.requires_grad and p.data.dtype == torch.float16]
        self.params['fp32'] = [p for p in params if p.requires_grad and p.data.dtype != torch.float16]
        self.groups = [group for group in self.params.keys() if len(self.params[group]) > 0]
        self.state = {}
        for group in self.groups:
            self.state[group] = self.get_model_state(group)

    def get_model_state(self, group):
        params = self.params[group]
        return _flatten_dense_tensors([p.data.float() for p in params])
        # if self.fp16:
        #     return _flatten_dense_tensors([p.data.half() for p in self.param_group if p.dtype])
        # else:
        #     return _flatten_dense_tensors([p.data for p in self.param_group])

    def step(self):
        for group in self.groups:
            self.state[group].mul_(self.mu).add_(1 - self.mu, self.get_model_state(group))

    def swap(self):
        # swap ema and model params
        for group in self.groups:
            other_state = self.get_model_state(group)
            state = self.state[group]
            params = self.params[group]
            offset = 0
            for p in params:
                numel = p.data.numel()
                p.data = state.narrow(0, offset, numel).view_as(p.data).type_as(p.data)
                offset += numel

            self.state[group] = other_state


================================================
FILE: jukebox/utils/fp16.py
================================================
# Utils for fp16 training.
import importlib
import math
import numpy as np
import torch
import jukebox.utils.dist_adapter as dist
from torch.optim import Optimizer
from torch._utils import _flatten_dense_tensors

from jukebox.utils.dist_utils import allreduce

def adam_step(p: torch.Tensor, out_p: torch.Tensor, exp_avg: torch.Tensor, exp_avg_sq: torch.Tensor, grad: torch.Tensor,
              lr: float, beta1: float, beta2: float, eps: float, scale: float, step: int, eps_mode: int, bias_correction: int, weight_decay: float):
    assert bias_correction == 1
    assert eps_mode == 1

    grad = grad.float()
    grad.div_(scale)

    # Decay the first and second moment running average coefficient
    exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
    exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
    denom = exp_avg_sq.sqrt().add_(eps)

    bias_correction1 = 1 - beta1 ** step
    bias_correction2 = 1 - beta2 ** step
    step_size = lr * math.sqrt(bias_correction2) / bias_correction1

    p.add_(exp_avg/denom + weight_decay*p.float(), alpha=-step_size)

# Import fused_adam if we have apex, otherwise use regular adam
try:
    fused_adam_cuda = importlib.import_module("fused_adam_cuda")
    fused_adam_step = fused_adam_cuda.adam
    print("Using apex fused_adam_cuda")
except ModuleNotFoundError:
    fused_adam_step = adam_step

def backward(loss, params, scalar, fp16, logger):
    # Perform backward
    if not fp16:
        scale = 1.0
        loss.backward()
        gn = grad_norm(params, scale)
        return loss, scale, gn, False, False
    else:
        scale = scalar.get_scale()
        loss = (loss.float())*scale
        overflow_loss = check_overflow(loss.item())
        overflow_loss = allreduce(int(overflow_loss), op=dist.ReduceOp.MAX) > 0
        if not overflow_loss:
            loss.backward()
            gn = grad_norm(params, scale)
            overflow_grad = check_overflow(gn)
            overflow_grad = allreduce(int(overflow_grad), op=dist.ReduceOp.MAX) > 0
            scalar.update_scale(overflow_grad)
        else:
            gn = 0.0
            overflow_grad = True
        loss = (loss.detach().float()) / scale # Should delete computation graph for overflow
        if logger.rank == 0:
            if loss > 12.: print(f"\nWarning. Loss is {loss}")
            if overflow_loss: print(f"\nOverflow in forward. Loss {loss}, lgscale {np.log2(scale)}. Skipping batch completely (no backward, scale update)")
            elif overflow_grad: print(f"\nOverflow in backward. Loss {loss}, grad norm {gn}, lgscale {np.log2(scale)}, new lgscale {np.log2(scalar.get_scale())}")
        return loss, scale, gn, overflow_loss, overflow_grad

# Automatic loss scaling
class LossScalar(object):
    def __init__(self,
                 loss_scale,
                 init_scale=2. ** 16,
                 scale_factor=2. ** (1. / 1000),
                 scale_window=1):
        if loss_scale == None:
            # Use dynamic loss scaling
            self.dynamic = True
            self.loss_scale = init_scale
        else:
            self.dynamic = False
            self.loss_scale = loss_scale
        self.max_loss_scale = 2.**24
        self.scale_factor = scale_factor
        self.scale_window  = scale_window
        self.unskipped = 0
        self.overflow = False

    def get_scale(self):
        return self.loss_scale

    def update_scale(self, overflow):
        if overflow and self.dynamic:
            self.loss_scale /= 2.
            self.unskipped = 0
        else:
            self.unskipped += 1

        if self.unskipped == self.scale_window and self.dynamic:
            self.loss_scale = min(self.max_loss_scale, self.loss_scale * self.scale_factor)
            self.unskipped = 0

def check_overflow(val):
    return (val == float('inf')) or (val == -float('inf')) or (val != val)

def grad_norm(params, scale, flat=False):
    params = list(params)
    if flat:
        # Faster but more memory
        fp16_grads = [p.grad for p in params if p.grad is not None and p.data.dtype == torch.float16]
        fp16_norm = 0.0 if len(fp16_grads) == 0 else float(_flatten_dense_tensors(fp16_grads).norm(p=2, dtype=torch.float32))
        fp32_grads = [p.grad for p in params if p.grad is not None and p.data.dtype != torch.float16]
        fp32_norm = 0.0 if len(fp32_grads) == 0 else float(_flatten_dense_tensors(fp32_grads).norm(p=2))
        grad_norm = (fp16_norm**2 + fp32_norm**2)**0.5
    else:
        # Slightly slower but less memory
        grad_norm = 0.0
        for p in params:
            if p.grad is not None:
                grad_norm += p.grad.norm(p=2, dtype=torch.float32)**2
        grad_norm = float(grad_norm**0.5)
    return grad_norm / scale

def clipped_grad_scale(grad_norm, max_grad_norm, scale):
    clip = grad_norm / max_grad_norm
    if clip > 1:
        scale = clip * scale
    return scale

class FP16FusedAdam(Optimizer):
    def __init__(
        self,
        params,
        lr=1e-3,
        bias_correction=True,
        betas=(0.9, 0.999),
        eps=1e-8,
        eps_inside_sqrt=False,
        weight_decay=0.0,
        amsgrad=False,
    ):
        if amsgrad:
            raise RuntimeError("FusedAdam does not support the AMSGrad variant.")
        defaults = dict(
            lr=lr, bias_correction=bias_correction, betas=betas, eps=eps, weight_decay=weight_decay
        )
        super(FP16FusedAdam, self).__init__(params, defaults)
        self.eps_mode = 0 if eps_inside_sqrt else 1
        self.FLOAT16_MAX = 65504.0
        self.init_state()

    def init_state(self):
        for group in self.param_groups:
            for p in group["params"]:
                assert p.requires_grad == True
                state = self.state[p]
                if len(state) == 0:
                    state["step"] = 0
                    # Exponential moving average of gradient values
                    state["exp_avg"] = torch.zeros_like(p.data)
                    # Exponential moving average of squared gradient values
                    state["exp_avg_sq"] = torch.zeros_like(p.data)
                    if p.data.dtype == torch.float16:
                        state["scale_exp_avg"] = 1.0
                        state["scale_exp_avg_sq"] = 1.0

    def step(self, closure=None, scale=1.0):
        """Performs a single optimization step. Scales gradients down by scale
        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
            scale (float, optional): factor to divide gradient tensor values
                by before applying to weights. (default: 1)
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            bias_correction = 1 if group["bias_correction"] else 0

            for p in group["params"]:
                if p.grad is None:
                    continue
                grad = p.grad.data

                state = self.state[p]

                if p.data.dtype == torch.float16:
                    exp_avg, exp_avg_sq = (
                        state["exp_avg"].float() * state["scale_exp_avg"],
                        state["exp_avg_sq"].float() * state["scale_exp_avg_sq"],
                    )
                else:
                    exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"]
                beta1, beta2 = group["betas"]

                state["step"] += 1

                out_p = torch.tensor([], dtype=torch.float)
                fused_adam_step(
                    p.data,
                    out_p,
                    exp_avg,
                    exp_avg_sq,
                    grad,
                    group["lr"],
                    beta1,
                    beta2,
                    group["eps"],
                    scale,
                    state["step"],
                    self.eps_mode,
                    bias_correction,
                    group["weight_decay"],
                )

                if p.data.dtype == torch.float16:
                    state["scale_exp_avg"] = (
                        1e-8 + float(torch.norm(exp_avg, float("inf"))) / self.FLOAT16_MAX
                    )
                    state["scale_exp_avg_sq"] = (
                        1e-8 + float(torch.norm(exp_avg_sq, float("inf"))) / self.FLOAT16_MAX
                    )
                    state["exp_avg"] = (exp_avg / state["scale_exp_avg"]).half()
                    state["exp_avg_sq"] = (exp_avg_sq / state["scale_exp_avg_sq"]).half()

        return loss


class FusedAdam(Optimizer):
    def __init__(
        self,
        params,
        lr=1e-3,
        bias_correction=True,
        betas=(0.9, 0.999),
        eps=1e-8,
        eps_inside_sqrt=False,
        weight_decay=0.0,
        amsgrad=False,
    ):
        if amsgrad:
            raise RuntimeError("FusedAdam does not support the AMSGrad variant.")
        defaults = dict(
            lr=lr, bias_correction=bias_correction, betas=betas, eps=eps, weight_decay=weight_decay
        )
        super(FusedAdam, self).__init__(params, defaults)
        self.eps_mode = 0 if eps_inside_sqrt else 1

    def step(self, closure=None, scale=1.0):
        """Performs a single optimization step. Scales gradients down by scale
        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
            scale (float, optional): factor to divide gradient tensor values
                by before applying to weights. (default: 1)
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            bias_correction = 1 if group["bias_correction"] else 0

            for p in group["params"]:
                if p.grad is None:
                    continue
                grad = p.grad.data

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state["step"] = 0
                    # Exponential moving average of gradient values
                    state["exp_avg"] = torch.zeros_like(p.data).float()
                    # Exponential moving average of squared gradient values
                    state["exp_avg_sq"] = torch.zeros_like(p.data).float()

                exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"]
                beta1, beta2 = group["betas"]

                state["step"] += 1

                out_p = torch.tensor([], dtype=torch.float)
                fused_adam_step(
                    p.data,
                    out_p,
                    exp_avg,
                    exp_avg_sq,
                    grad,
                    group["lr"],
                    beta1,
                    beta2,
                    group["eps"],
                    scale,
                    state["step"],
                    self.eps_mode,
                    bias_correction,
                    group["weight_decay"],
                )

        return loss


================================================
FILE: jukebox/utils/io.py
================================================
import numpy as np
import av
import torch as t
import jukebox.utils.dist_adapter as dist

def get_duration_sec(file, cache=False):
    try:
        with open(file + '.dur', 'r') as f:
            duration = float(f.readline().strip('\n'))
        return duration
    except:
        container = av.open(file)
        audio = container.streams.get(audio=0)[0]
        duration = audio.duration * float(audio.time_base)
        if cache:
            with open(file + '.dur', 'w') as f:
                f.write(str(duration) + '\n')
        return duration

def load_audio(file, sr, offset, duration, resample=True, approx=False, time_base='samples', check_duration=True):
    if time_base == 'sec':
        offset = offset * sr
        duration = duration * sr
    # Loads at target sr, stereo channels, seeks from offset, and stops after duration
    container = av.open(file)
    audio = container.streams.get(audio=0)[0] # Only first audio stream
    audio_duration = audio.duration * float(audio.time_base)
    if approx:
        if offset + duration > audio_duration*sr:
            # Move back one window. Cap at audio_duration
            offset = np.min(audio_duration*sr - duration, offset - duration)
    else:
        if check_duration:
            assert offset + duration <= audio_duration*sr, f'End {offset + duration} beyond duration {audio_duration*sr}'
    if resample:
        resampler = av.AudioResampler(format='fltp',layout='stereo', rate=sr)
    else:
        assert sr == audio.sample_rate
    offset = int(offset / sr / float(audio.time_base)) #int(offset / float(audio.time_base)) # Use units of time_base for seeking
    duration = int(duration) #duration = int(duration * sr) # Use units of time_out ie 1/sr for returning
    sig = np.zeros((2, duration), dtype=np.float32)
    container.seek(offset, stream=audio)
    total_read = 0
    for frame in container.decode(audio=0): # Only first audio stream
        if resample:
            frame.pts = None
            frame = resampler.resample(frame)
        frame = frame.to_ndarray(format='fltp') # Convert to floats and not int16
        read = frame.shape[-1]
        if total_read + read > duration:
            read = duration - total_read
        sig[:, total_read:total_read + read] = frame[:, :read]
        total_read += read
        if total_read == duration:
            break
    assert total_read <= duration, f'Expected {duration} frames, got {total_read}'
    return sig, sr

def test_simple_loader():
    import librosa
    from tqdm import tqdm

    collate_fn = lambda batch: t.stack([t.from_numpy(b) for b in batch], dim=0)

    def get_batch(file, loader):
        y1, sr = loader(file, sr=44100, offset=0.0, duration=6.0, time_base='sec')
        y2, sr = loader(file, sr=44100, offset=20.0, duration=6.0, time_base='sec')
        return [y1, y2]

    def load(file, loader):
        batch = get_batch(file, loader)  # np
        x = collate_fn(batch)  # torch cpu
        x = x.to('cuda', non_blocking=True)  # torch gpu
        return x

    files = librosa.util.find_files('/root/data/', ['mp3', 'm4a', 'opus'])
    print(files[:10])
    loader = load_audio
    print("Loader", loader.__name__)
    x = t.randn(2, 2).cuda()
    x = load(files[0], loader)
    for i,file in enumerate(tqdm(files)):
        x = load(file, loader)
        if i == 100:
            break

def test_dataset_loader():
    from tqdm import tqdm
    from torch.utils.data import DataLoader
    from torch.utils.data.distributed import DistributedSampler
    from jukebox.utils.audio_utils import audio_preprocess, audio_postprocess
    from jukebox.hparams import setup_hparams
    from jukebox.data.files_dataset import FilesAudioDataset
    hps = setup_hparams("teeny", {})
    hps.sr = 22050  # 44100
    hps.hop_length = 512
    hps.labels = False
    hps.channels = 2
    hps.aug_shift = False
    hps.bs = 2
    hps.nworkers = 2 # Getting 20 it/s with 2 workers, 10 it/s with 1 worker
    print(hps)
    dataset = hps.dataset
    root = hps.root
    from tensorboardX import SummaryWriter
    sr = {22050: '22k', 44100: '44k', 48000: '48k'}[hps.sr]
    writer = SummaryWriter(f'{root}/{dataset}/logs/{sr}/logs')
    dataset = FilesAudioDataset(hps)
    print("Length of dataset", len(dataset))

    # Torch Loader
    collate_fn = lambda batch: t.stack([t.from_numpy(b) for b in batch], 0)
    sampler = DistributedSampler(dataset)
    train_loader = DataLoader(dataset, batch_size=hps.bs, num_workers=hps.nworkers, pin_memory=False, sampler=sampler,
                              drop_last=True, collate_fn=collate_fn)

    dist.barrier()
    sampler.set_epoch(0)
    for i, x in enumerate(tqdm(train_loader)):
        x = x.to('cuda', non_blocking=True)
        for j, aud in enumerate(x):
            writer.add_audio('in_' + str(i*hps.bs + j), aud, 1, hps.sr)
        print("Wrote in")
        x = audio_preprocess(x, hps)
        x = audio_postprocess(x, hps)
        for j, aud in enumerate(x):
            writer.add_audio('out_' + str(i*hps.bs + j), aud, 1, hps.sr)
        print("Wrote out")
        dist.barrier()
        break

if __name__ == '__main__':
    from jukebox.utils.dist_utils import setup_dist_from_mpi
    setup_dist_from_mpi(port=29500)
    test_dataset_loader()


================================================
FILE: jukebox/utils/logger.py
================================================
import torch as t
import jukebox.utils.dist_adapter as dist
from tqdm import tqdm
from datetime import date
import os
import sys

def def_tqdm(x):
    return tqdm(x, leave=True, file=sys.stdout, bar_format="{n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]")

def get_range(x):
    if dist.get_rank() == 0:
        return def_tqdm(x)
    else:
        return x

def init_logging(hps, local_rank, rank):
    logdir = f"{hps.local_logdir}/{hps.name}"
    if local_rank == 0:
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        with open(logdir + 'argv.txt', 'w') as f:
            f.write(hps.argv + '\n')
        print("Logging to", logdir)
    logger = Logger(logdir, rank)
    metrics = Metrics()
    logger.add_text('hps', str(hps))
    return logger, metrics

def get_name(hps):
    name = ""
    for key, value in hps.items():
        name += f"{key}_{value}_"
    return name

def average_metrics(_metrics):
    metrics = {}
    for _metric in _metrics:
        for key, val in _metric.items():
            if key not in metrics:
                metrics[key] = []
            metrics[key].append(val)
    return {key: sum(vals)/len(vals) for key, vals in metrics.items()}

class Metrics:
    def __init__(self):
        self.sum = {}
        self.n = {}

    def update(self, tag, val, batch):
        # v is average value over batch
        # store total value and total batch, returns dist average
        sum = t.tensor(val * batch).float().cuda()
        n = t.tensor(batch).float().cuda()
        dist.all_reduce(sum)
        dist.all_reduce(n)
        sum = sum.item()
        n = n.item()
        self.sum[tag] = self.sum.get(tag, 0.0) + sum
        self.n[tag] = self.n.get(tag, 0.0) + n
        return sum / n

    def avg(self, tag):
        if tag in self.sum:
            return self.sum[tag] / self.n[tag]
        else:
            return 0.0

    def reset(self):
        self.sum = {}
        self.n = {}

class Logger:
    def __init__(self, logdir, rank):
        if rank == 0:
            from tensorboardX import SummaryWriter
            self.sw = SummaryWriter(f"{logdir}/logs")
        self.iters = 0
        self.rank = rank
        self.works = []
        self.logdir = logdir

    def step(self):
        self.iters += 1

    def flush(self):
        if self.rank == 0:
            self.sw.flush()

    def add_text(self, tag, text):
        if self.rank == 0:
            self.sw.add_text(tag, text, self.iters)

    def add_audios(self, tag, auds, sample_rate=22050, max_len=None, max_log=8):
        if self.rank == 0:
            for i in range(min(len(auds), max_log)):
                if max_len:
                    self.sw.add_audio(f"{i}/{tag}", auds[i][:max_len * sample_rate], self.iters, sample_rate)
                else:
                    self.sw.add_audio(f"{i}/{tag}", auds[i], self.iters, sample_rate)

    def add_audio(self, tag, aud, sample_rate=22050):
        if self.rank == 0:
            self.sw.add_audio(tag, aud, self.iters, sample_rate)

    def add_images(self, tag, img, dataformats="NHWC"):
        if self.rank == 0:
            self.sw.add_images(tag, img, self.iters, dataformats=dataformats)

    def add_image(self, tag, img):
        if self.rank == 0:
            self.sw.add_image(tag, img, self.iters)

    def add_scalar(self, tag, val):
        if self.rank == 0:
            self.sw.add_scalar(tag, val, self.iters)

    def get_range(self, loader):
        if self.rank == 0:
            self.trange = def_tqdm(loader)
        else:
            self.trange = loader
        return enumerate(self.trange)

    def close_range(self):
        if self.rank == 0:
            self.trange.close()

    def set_postfix(self, *args, **kwargs):
        if self.rank == 0:
            self.trange.set_postfix(*args, **kwargs)

    # For logging summaries of varies graph ops
    def add_reduce_scalar(self, tag, layer, val):
        if self.iters % 100 == 0:
            with t.no_grad():
                val = val.float().norm()/float(val.numel())
            work = dist.reduce(val, 0, async_op=True)
            self.works.append((tag, layer, val, work))

    def finish_reduce(self):
        for tag, layer, val, work in self.works:
            work.wait()
            if self.rank == 0:
                val = val.item()/dist.get_world_size()
                self.lw[layer].add_scalar(tag, val, self.iters)
        self.works = []


================================================
FILE: jukebox/utils/remote_utils.py
================================================
import sys
import subprocess

def download(remote_path, local_path, async_download=False):
    args = ['wget', '-O', local_path, remote_path]
    print("Running ", " ".join(args))
    if async_download:
        subprocess.Popen(args)
    else:
        subprocess.call(args)

# GCE
def gs_download(gs_path, local_path, async_download=False):
    args = ['gsutil',
            '-o', 'GSUtil:parallel_thread_count=1',
            '-o', 'GSUtil:sliced_object_download_max_components=8',
            'cp', gs_path, local_path]
    if async_download:
        subprocess.Popen(args)
    else:
        subprocess.call(args)


def gs_upload(local_path, gs_path, async_upload=False):
    # NOTE: Download and upload have differ -o flags.
    # We also use -n to prevent clobbering checkpoints by mistake
    assert not local_path.startswith("gs://")
    assert gs_path.startswith("gs://")
    args = ['gsutil',
            '-o', 'GSUtil:parallel_composite_upload_threshold=150M',
            'cp', '-n', local_path, gs_path]
    if async_upload:
        subprocess.Popen(args)
    else:
        subprocess.call(args)

def ls(regex):
    outputs = subprocess.check_output(['gsutil', 'ls', regex]).decode(sys.stdout.encoding)
    outputs = outputs.split('\n')
    outputs = [output for output in outputs if output is not '']
    return outputs


================================================
FILE: jukebox/utils/sample_utils.py
================================================
import torch as t

def split_batch(obj, n_samples, split_size):
    n_passes = (n_samples + split_size - 1) // split_size
    if isinstance(obj, t.Tensor):
        return t.split(obj, split_size, dim=0)
    elif isinstance(obj, list):
        return list(zip(*[t.split(item, split_size, dim=0) for item in obj]))
    elif obj is None:
        return [None] * n_passes
    else:
        raise TypeError('Unknown input type')

# Break total_length into hops/windows of size n_ctx separated by hop_length
def get_starts(total_length, n_ctx, hop_length):
    starts = []
    for start in range(0, total_length - n_ctx + hop_length, hop_length):
        if start + n_ctx >= total_length:
            # Last hop could be smaller, we make it n_ctx to maximise context
            start = total_length - n_ctx
        starts.append(start)
    return starts


================================================
FILE: jukebox/utils/torch_utils.py
================================================
import gc
import torch as t

def freeze_model(model):
    model.eval()
    for params in model.parameters():
        params.requires_grad = False


def unfreeze_model(model):
    model.train()
    for params in model.parameters():
        params.requires_grad = True

def zero_grad(model):
    for p in model.parameters():
        if p.requires_grad and p.grad is not None:
            p.grad = None

def empty_cache():
    gc.collect()
    t.cuda.empty_cache()

def assert_shape(x, exp_shape):
    assert x.shape == exp_shape, f"Expected {exp_shape} got {x.shape}"

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def count_state(model):
    return sum(s.numel() for s in model.state_dict().values())


================================================
FILE: jukebox/vqvae/__init__.py
================================================


================================================
FILE: jukebox/vqvae/bottleneck.py
================================================
import numpy as np
import torch as t
import torch.nn as nn
import torch.nn.functional as F
import jukebox.utils.dist_adapter as dist

class BottleneckBlock(nn.Module):
    def __init__(self, k_bins, emb_width, mu):
        super().__init__()
        self.k_bins = k_bins
        self.emb_width = emb_width
        self.mu = mu
        self.reset_k()
        self.threshold = 1.0

    def reset_k(self):
        self.init = False
        self.k_sum = None
        self.k_elem = None
        self.register_buffer('k', t.zeros(self.k_bins, self.emb_width).cuda())

    def _tile(self, x):
        d, ew = x.shape
        if d < self.k_bins:
            n_repeats = (self.k_bins + d - 1) // d
            std = 0.01 / np.sqrt(ew)
            x = x.repeat(n_repeats, 1)
            x = x + t.randn_like(x) * std
        return x

    def init_k(self, x):
        mu, emb_width, k_bins = self.mu, self.emb_width, self.k_bins
        self.init = True
        # init k_w using random vectors from x
        y = self._tile(x)
        _k_rand = y[t.randperm(y.shape[0])][:k_bins]
        dist.broadcast(_k_rand, 0)
        self.k = _k_rand
        assert self.k.shape == (k_bins, emb_width)
        self.k_sum = self.k
        self.k_elem = t.ones(k_bins, device=self.k.device)

    def restore_k(self, num_tokens=None, threshold=1.0):
        mu, emb_width, k_bins = self.mu, self.emb_width, self.k_bins
        self.init = True
        assert self.k.shape == (k_bins, emb_width)
        self.k_sum = self.k.clone()
        self.k_elem = t.ones(k_bins, device=self.k.device)
        if num_tokens is not None:
            expected_usage = num_tokens / k_bins
            self.k_elem.data.mul_(expected_usage)
            self.k_sum.data.mul_(expected_usage)
        self.threshold = threshold

    def update_k(self, x, x_l):
        mu, emb_width, k_bins = self.mu, self.emb_width, self.k_bins
        with t.no_grad():
            # Calculate new centres
            x_l_onehot = t.zeros(k_bins, x.shape[0], device=x.device)  # k_bins, N * L
            x_l_onehot.scatter_(0, x_l.view(1, x.shape[0]), 1)

            _k_sum = t.matmul(x_l_onehot, x)  # k_bins, w
            _k_elem = x_l_onehot.sum(dim=-1)  # k_bins
            y = self._tile(x)
            _k_rand = y[t.randperm(y.shape[0])][:k_bins]

            dist.broadcast(_k_rand, 0)
            dist.all_reduce(_k_sum)
            dist.all_reduce(_k_elem)

            # Update centres
            old_k = self.k
            self.k_sum = mu * self.k_sum + (1. - mu) * _k_sum  # w, k_bins
            self.k_elem = mu * self.k_elem + (1. - mu) * _k_elem  # k_bins
            usage = (self.k_elem.view(k_bins, 1) >= self.threshold).float()
            self.k = usage * (self.k_sum.view(k_bins, emb_width) / self.k_elem.view(k_bins, 1)) \
                     + (1 - usage) * _k_rand
            _k_prob = _k_elem / t.sum(_k_elem)  # x_l_onehot.mean(dim=-1)  # prob of each bin
            entropy = -t.sum(_k_prob * t.log(_k_prob + 1e-8))  # entropy ie how diverse
            used_curr = (_k_elem >= self.threshold).sum()
            usage = t.sum(usage)
            dk = t.norm(self.k - old_k) / np.sqrt(np.prod(old_k.shape))
        return dict(entropy=entropy,
                    used_curr=used_curr,
                    usage=usage,
                    dk=dk)

    def preprocess(self, x):
        # NCT -> NTC -> [NT, C]
        x = x.permute(0, 2, 1).contiguous()
        x = x.view(-1, x.shape[-1])  # x_en = (N * L, w), k_j = (w, k_bins)

        if x.shape[-1] == self.emb_width:
            prenorm = t.norm(x - t.mean(x)) / np.sqrt(np.prod(x.shape))
        elif x.shape[-1] == 2 * self.emb_width:
            x1, x2 = x[...,:self.emb_width], x[...,self.emb_width:]
            prenorm = (t.norm(x1 - t.mean(x1)) / np.sqrt(np.prod(x1.shape))) + (t.norm(x2 - t.mean(x2)) / np.sqrt(np.prod(x2.shape)))

            # Normalise
            x = x1 + x2
        else:
            assert False, f"Expected {x.shape[-1]} to be (1 or 2) * {self.emb_width}"
        return x, prenorm

    def postprocess(self, x_l, x_d, x_shape):
        # [NT, C] -> NTC -> NCT
        N, T = x_shape
        x_d = x_d.view(N, T, -1).permute(0, 2, 1).contiguous()
        x_l = x_l.view(N, T)
        return x_l, x_d

    def quantise(self, x):
        # Calculate latent code x_l
        k_w = self.k.t()
        distance = t.sum(x ** 2, dim=-1, keepdim=True) - 2 * t.matmul(x, k_w) + t.sum(k_w ** 2, dim=0,
                                                                                            keepdim=True)  # (N * L, b)
        min_distance, x_l = t.min(distance, dim=-1)
        fit = t.mean(min_distance)
        return x_l, fit

    def dequantise(self, x_l):
        x = F.embedding(x_l, self.k)
        return x

    def encode(self, x):
        N, width, T = x.shape

        # Preprocess.
        x, prenorm = self.preprocess(x)

        # Quantise
        x_l, fit = self.quantise(x)

        # Postprocess.
        x_l = x_l.view(N, T)
        return x_l

    def decode(self, x_l):
        N, T = x_l.shape
        width = self.emb_width

        # Dequantise
        x_d = self.dequantise(x_l)

        # Postprocess
        x_d = x_d.view(N, T, width).permute(0, 2, 1).contiguous()
        return x_d

    def forward(self, x, update_k=True):
        N, width, T = x.shape

        # Preprocess
        x, prenorm = self.preprocess(x)

        # Init k if not inited
        if update_k and not self.init:
            self.init_k(x)

        # Quantise and dequantise through bottleneck
        x_l, fit = self.quantise(x)
        x_d = self.dequantise(x_l)

        # Update embeddings
        if update_k:
            update_metrics = self.update_k(x, x_l)
        else:
            update_metrics = {}

        # Loss
        commit_loss = t.norm(x_d.detach() - x) ** 2 / np.prod(x.shape)

        # Passthrough
        x_d = x + (x_d - x).detach()

        # Postprocess
        x_l, x_d = self.postprocess(x_l, x_d, (N,T))
        return x_l, x_d, commit_loss, dict(fit=fit,
                                           pn=prenorm,
                                           **update_metrics)


class Bottleneck(nn.Module):
    def __init__(self, l_bins, emb_width, mu, levels):
        super().__init__()
        self.levels = levels
        level_block = lambda level: BottleneckBlock(l_bins, emb_width, mu)
        self.level_blocks = nn.ModuleList()
        for level in range(self.levels):
            self.level_blocks.append(level_block(level))

    def encode(self, xs):
        zs = [level_block.encode(x) for (level_block, x) in zip(self.level_blocks, xs)]
        return zs

    def decode(self, zs, start_level=0, end_level=None):
        if end_level is None:
            end_level = self.levels
        xs_quantised = [level_block.decode(z) for (level_block, z) in zip(self.level_blocks[start_level:end_level], zs)]
        return xs_quantised

    def forward(self, xs):
        zs, xs_quantised, commit_losses, metrics = [], [], [], []
        for level in range(self.levels):
            level_block = self.level_blocks[level]
            x = xs[level]
            z, x_quantised, commit_loss, metric = level_block(x, update_k=self.training)
            zs.append(z)
            if not self.training:
                # Be extra paranoid and make sure the encoder weights can't
                # change from straight-through estimator
                x_quantised = x_quantised.detach()
            xs_quantised.append(x_quantised)
            commit_losses.append(commit_loss)
            if self.training:
                metrics.append(metric)
        return zs, xs_quantised, commit_losses, metrics

class NoBottleneckBlock(nn.Module):
    def restore_k(self):
        pass

class NoBottleneck(nn.Module):
    def __init__(self, levels):
        super().__init__()
        self.level_blocks = nn.ModuleList()
        self.levels = levels
        for level in range(levels):
            self.level_blocks.append(NoBottleneckBlock())

    def encode(self, xs):
        return xs

    def decode(self, zs, start_level=0, end_level=None):
        if end_level is None:
            end_level = self.levels
        return zs

    def forward(self, xs):
        zero = t.zeros(()).cuda()
        commit_losses = [zero for _ in range(self.levels)]
        metrics = [dict(entropy=zero, usage=zero, used_curr=zero, pn=zero, dk=zero) for _ in range(self.levels)]
        return xs, xs, commit_losses, metrics

if __name__ == '__main__':
    from jukebox.utils.dist_utils import setup_dist_from_mpi
    rank, local_rank, device = setup_dist_from_mpi(port=29600)
    bottleneck = Bottleneck(256, 64, 0.99, 2).to(device)
    bottleneck.check()


================================================
FILE: jukebox/vqvae/encdec.py
================================================
import torch as t
import torch.nn as nn
from jukebox.vqvae.resnet import Resnet, Resnet1D
from jukebox.utils.torch_utils import assert_shape

class EncoderConvBlock(nn.Module):
    def __init__(self, input_emb_width, output_emb_width, down_t,
                 stride_t, width, depth, m_conv,
                 dilation_growth_rate=1, dilation_cycle=None, zero_out=False,
                 res_scale=False):
        super().__init__()
        blocks = []
        filter_t, pad_t = stride_t * 2, stride_t // 2
        if down_t > 0:
            for i in range(down_t):
                block = nn.Sequential(
                    nn.Conv1d(input_emb_width if i == 0 else width, width, filter_t, stride_t, pad_t),
                    Resnet1D(width, depth, m_conv, dilation_growth_rate, dilation_cycle, zero_out, res_scale),
                )
                blocks.append(block)
            block = nn.Conv1d(width, output_emb_width, 3, 1, 1)
            blocks.append(block)
        self.model = nn.Sequential(*blocks)

    def forward(self, x):
        return self.model(x)

class DecoderConvBock(nn.Module):
    def __init__(self, input_emb_width, output_emb_width, down_t,
                 stride_t, width, depth, m_conv, dilation_growth_rate=1, dilation_cycle=None, zero_out=False, res_scale=False, reverse_decoder_dilation=False, checkpoint_res=False):
        super().__init__()
        blocks = []
        if down_t > 0:
            filter_t, pad_t = stride_t * 2, stride_t // 2
            block = nn.Conv1d(output_emb_width, width, 3, 1, 1)
            blocks.append(block)
            for i in range(down_t):
                block = nn.Sequential(
                    Resnet1D(width, depth, m_conv, dilation_growth_rate, dilation_cycle, zero_out=zero_out, res_scale=res_scale, reverse_dilation=reverse_decoder_dilation, checkpoint_res=checkpoint_res),
                    nn.ConvTranspose1d(width, input_emb_width if i == (down_t - 1) else width, filter_t, stride_t, pad_t)
                )
                blocks.append(block)
        self.model = nn.Sequential(*blocks)

    def forward(self, x):
        return self.model(x)

class Encoder(nn.Module):
    def __init__(self, input_emb_width, output_emb_width, levels, downs_t,
                 strides_t, **block_kwargs):
        super().__init__()
        self.input_emb_width = input_emb_width
        self.output_emb_width = output_emb_width
        self.levels = levels
        self.downs_t = downs_t
        self.strides_t = strides_t

        block_kwargs_copy = dict(**block_kwargs)
        if 'reverse_decoder_dilation' in block_kwargs_copy:
            del block_kwargs_copy['reverse_decoder_dilation']
        level_block = lambda level, down_t, stride_t: EncoderConvBlock(input_emb_width if level == 0 else output_emb_width,
                                                           output_emb_width,
                                                           down_t, stride_t,
                                                           **block_kwargs_copy)
        self.level_blocks = nn.ModuleList()
        iterator = zip(list(range(self.levels)), downs_t, strides_t)
        for level, down_t, stride_t in iterator:
            self.level_blocks.append(level_block(level, down_t, stride_t))

    def forward(self, x):
        N, T = x.shape[0], x.shape[-1]
        emb = self.input_emb_width
        assert_shape(x, (N, emb, T))
        xs = []

        # 64, 32, ...
        iterator = zip(list(range(self.levels)), self.downs_t, self.strides_t)
        for level, down_t, stride_t in iterator:
            level_block = self.level_blocks[level]
            x = level_block(x)
            emb, T = self.output_emb_width, T // (stride_t ** down_t)
            assert_shape(x, (N, emb, T))
            xs.append(x)

        return xs

class Decoder(nn.Module):
    def __init__(self, input_emb_width, output_emb_width, levels, downs_t,
                 strides_t, **block_kwargs):
        super().__init__()
        self.input_emb_width = input_emb_width
        self.output_emb_width = output_emb_width
        self.levels = levels

        self.downs_t = downs_t

        self.strides_t = strides_t

        level_block = lambda level, down_t, stride_t: DecoderConvBock(output_emb_width,
                                                          output_emb_width,
                                                          down_t, stride_t,
                                                          **block_kwargs)
        self.level_blocks = nn.ModuleList()
        iterator = zip(list(range(self.levels)), downs_t, strides_t)
        for level, down_t, stride_t in iterator:
            self.level_blocks.append(level_block(level, down_t, stride_t))

        self.out = nn.Conv1d(output_emb_width, input_emb_width, 3, 1, 1)

    def forward(self, xs, all_levels=True):
        if all_levels:
            assert len(xs) == self.levels
        else:
            assert len(xs) == 1
        x = xs[-1]
        N, T = x.shape[0], x.shape[-1]
        emb = self.output_emb_width
        assert_shape(x, (N, emb, T))

        # 32, 64 ...
        iterator = reversed(list(zip(list(range(self.levels)), self.downs_t, self.strides_t)))
        for level, down_t, stride_t in iterator:
            level_block = self.level_blocks[level]
            x = level_block(x)
            emb, T = self.output_emb_width, T * (stride_t ** down_t)
            assert_shape(x, (N, emb, T))
            if level != 0 and all_levels:
                x = x + xs[level - 1]

        x = self.out(x)
        return x


================================================
FILE: jukebox/vqvae/resnet.py
================================================
import math
import torch.nn as nn
import jukebox.utils.dist_adapter as dist
from jukebox.utils.checkpoint import checkpoint

class ResConvBlock(nn.Module):
    def __init__(self, n_in, n_state):
        super().__init__()
        self.model = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(n_in, n_state, 3, 1, 1),
            nn.ReLU(),
            nn.Conv2d(n_state, n_in, 1, 1, 0),
        )

    def forward(self, x):
        return x + self.model(x)

class Resnet(nn.Module):
    def __init__(self, n_in, n_depth, m_conv=1.0):
        super().__init__()
        self.model = nn.Sequential(*[ResConvBlock(n_in, int(m_conv * n_in)) for _ in range(n_depth)])

    def forward(self, x):
        return self.model(x)

class ResConv1DBlock(nn.Module):
    def __init__(self, n_in, n_state, dilation=1, zero_out=False, res_scale=1.0):
        super().__init__()
        padding = dilation
        self.model = nn.Sequential(
            nn.ReLU(),
            nn.Conv1d(n_in, n_state, 3, 1, padding, dilation),
            nn.ReLU(),
            nn.Conv1d(n_state, n_in, 1, 1, 0),
        )
        if zero_out:
            out = self.model[-1]
            nn.init.zeros_(out.weight)
            nn.init.zeros_(out.bias)
        self.res_scale = res_scale

    def forward(self, x):
        return x + self.res_scale * self.model(x)

class Resnet1D(nn.Module):
    def __init__(self, n_in, n_depth, m_conv=1.0, dilation_growth_rate=1, dilation_cycle=None, zero_out=False, res_scale=False, reverse_dilation=False, checkpoint_res=False):
        super().__init__()
        def _get_depth(depth):
            if dilation_cycle is None:
                return depth
            else:
                return depth % dilation_cycle
        blocks = [ResConv1DBlock(n_in, int(m_conv * n_in),
                                 dilation=dilation_growth_rate ** _get_depth(depth),
                                 zero_out=zero_out,
                                 res_scale=1.0 if not res_scale else 1.0 / math.sqrt(n_depth))
                  for depth in range(n_depth)]
        if reverse_dilation:
            blocks = blocks[::-1]
        self.checkpoint_res = checkpoint_res
        if self.checkpoint_res == 1:
            if dist.get_rank() == 0:
                print("Checkpointing convs")
            self.blocks = nn.ModuleList(blocks)
        else:
            self.model = nn.Sequential(*blocks)

    def forward(self, x):
        if self.checkpoint_res == 1:
            for block in self.blocks:
                x = checkpoint(block, (x, ), block.parameters(), True)
            return x
        else:
            return self.model(x)


================================================
FILE: jukebox/vqvae/vqvae.py
================================================
import numpy as np
import torch as t
import torch.nn as nn

from jukebox.vqvae.encdec import Encoder, Decoder, assert_shape
from jukebox.vqvae.bottleneck import NoBottleneck, Bottleneck
from jukebox.utils.logger import average_metrics
from jukebox.utils.audio_utils import spectral_convergence, spectral_loss, multispectral_loss, audio_postprocess

def dont_update(params):
    for param in params:
        param.requires_grad = False

def update(params):
    for param in params:
        param.requires_grad = True

def calculate_strides(strides, downs):
    return [stride ** down for stride, down in zip(strides, downs)]

def _loss_fn(loss_fn, x_target, x_pred, hps):
    if loss_fn == 'l1':
        return t.mean(t.abs(x_pred - x_target)) / hps.bandwidth['l1']
    elif loss_fn == 'l2':
        return t.mean((x_pred - x_target) ** 2) / hps.bandwidth['l2']
    elif loss_fn == 'linf':
        residual = ((x_pred - x_target) ** 2).reshape(x_target.shape[0], -1)
        values, _ = t.topk(residual, hps.linf_k, dim=1)
        return t.mean(values) / hps.bandwidth['l2']
    elif loss_fn == 'lmix':
        loss = 0.0
        if hps.lmix_l1:
            loss += hps.lmix_l1 * _loss_fn('l1', x_target, x_pred, hps)
        if hps.lmix_l2:
            loss += hps.lmix_l2 * _loss_fn('l2', x_target, x_pred, hps)
        if hps.lmix_linf:
            loss += hps.lmix_linf * _loss_fn('linf', x_target, x_pred, hps)
        return loss
    else:
        assert False, f"Unknown loss_fn {loss_fn}"

class VQVAE(nn.Module):
    def __init__(self, input_shape, levels, downs_t, strides_t,
                 emb_width, l_bins, mu, commit, spectral, multispectral,
                 multipliers=None, use_bottleneck=True, **block_kwargs):
        super().__init__()

        self.sample_length = input_shape[0]
        x_shape, x_channels = input_shape[:-1], input_shape[-1]
        self.x_shape = x_shape

        self.downsamples = calculate_strides(strides_t, downs_t)
        self.hop_lengths = np.cumprod(self.downsamples)
        self.z_shapes = z_shapes = [(x_shape[0] // self.hop_lengths[level],) for level in range(levels)]
        self.levels = levels

        if multipliers is None:
            self.multipliers = [1] * levels
        else:
            assert len(multipliers) == levels, "Invalid number of multipliers"
            self.multipliers = multipliers
        def _block_kwargs(level):
            this_block_kwargs = dict(block_kwargs)
            this_block_kwargs["width"] *= self.multipliers[level]
            this_block_kwargs["depth"] *= self.multipliers[level]
            return this_block_kwargs

        encoder = lambda level: Encoder(x_channels, emb_width, level + 1,
                                        downs_t[:level+1], strides_t[:level+1], **_block_kwargs(level))
        decoder = lambda level: Decoder(x_channels, emb_width, level + 1,
                                        downs_t[:level+1], strides_t[:level+1], **_block_kwargs(level))
        self.encoders = nn.ModuleList()
        self.decoders = nn.ModuleList()
        for level in range(levels):
            self.encoders.append(encoder(level))
            self.decoders.append(decoder(level))

        if use_bottleneck:
            self.bottleneck = Bottleneck(l_bins, emb_width, mu, levels)
        else:
            self.bottleneck = NoBottleneck(levels)

        self.downs_t = downs_t
        self.strides_t = strides_t
        self.l_bins = l_bins
        self.commit = commit
        self.spectral = spectral
        self.multispectral = multispectral

    def preprocess(self, x):
        # x: NTC [-1,1] -> NCT [-1,1]
        assert len(x.shape) == 3
        x = x.permute(0,2,1).float()
        return x

    def postprocess(self, x):
        # x: NTC [-1,1] <- NCT [-1,1]
        x = x.permute(0,2,1)
        return x

    def _decode(self, zs, start_level=0, end_level=None):
        # Decode
        if end_level is None:
            end_level = self.levels
        assert len(zs) == end_level - start_level
        xs_quantised = self.bottleneck.decode(zs, start_level=start_level, end_level=end_level)
        assert len(xs_quantised) == end_level - start_level

        # Use only lowest level
        decoder, x_quantised = self.decoders[start_level], xs_quantised[0:1]
        x_out = decoder(x_quantised, all_levels=False)
        x_out = self.postprocess(x_out)
        return x_out

    def decode(self, zs, start_level=0, end_level=None, bs_chunks=1):
        z_chunks = [t.chunk(z, bs_chunks, dim=0) for z in zs]
        x_outs = []
        for i in range(bs_chunks):
            zs_i = [z_chunk[i] for z_chunk in z_chunks]
            x_out = self._decode(zs_i, start_level=start_level, end_level=end_level)
            x_outs.append(x_out)
        return t.cat(x_outs, dim=0)

    def _encode(self, x, start_level=0, end_level=None):
        # Encode
        if end_level is None:
            end_level = self.levels
        x_in = self.preprocess(x)
        xs = []
        for level in range(self.levels):
            encoder = self.encoders[level]
            x_out = encoder(x_in)
            xs.append(x_out[-1])
        zs = self.bottleneck.encode(xs)
        return zs[start_level:end_level]

    def encode(self, x, start_level=0, end_level=None, bs_chunks=1):
        x_chunks = t.chunk(x, bs_chunks, dim=0)
        zs_list = []
        for x_i in x_chunks:
            zs_i = self._encode(x_i, start_level=start_level, end_level=end_level)
            zs_list.append(zs_i)
        zs = [t.cat(zs_level_list, dim=0) for zs_level_list in zip(*zs_list)]
        return zs

    def sample(self, n_samples):
        zs = [t.randint(0, self.l_bins, size=(n_samples, *z_shape), device='cuda') for z_shape in self.z_shapes]
        return self.decode(zs)

    def forward(self, x, hps, loss_fn='l1'):
        metrics = {}

        N = x.shape[0]

        # Encode/Decode
        x_in = self.preprocess(x)
        xs = []
        for level in range(self.levels):
            encoder = self.encoders[level]
            x_out = encoder(x_in)
            xs.append(x_out[-1])

        zs, xs_quantised, commit_losses, quantiser_metrics = self.bottleneck(xs)
        x_outs = []
        for level in range(self.levels):
            decoder = self.decoders[level]
            x_out = decoder(xs_quantised[level:level+1], all_levels=False)
            assert_shape(x_out, x_in.shape)
            x_outs.append(x_out)

        # Loss
        def _spectral_loss(x_target, x_out, hps):
            if hps.use_nonrelative_specloss:
                sl = spectral_loss(x_target, x_out, hps) / hps.bandwidth['spec']
            else:
                sl = spectral_convergence(x_target, x_out, hps)
            sl = t.mean(sl)
            return sl

        def _multispectral_loss(x_target, x_out, hps):
            sl = multispectral_loss(x_target, x_out, hps) / hps.bandwidth['spec']
            sl = t.mean(sl)
            return sl

        recons_loss = t.zeros(()).to(x.device)
        spec_loss = t.zeros(()).to(x.device)
        multispec_loss = t.zeros(()).to(x.device)
        x_target = audio_postprocess(x.float(), hps)

        for level in reversed(range(self.levels)):
            x_out = self.postprocess(x_outs[level])
            x_out = audio_postprocess(x_out, hps)
            this_recons_loss = _loss_fn(loss_fn, x_target, x_out, hps)
            this_spec_loss = _spectral_loss(x_target, x_out, hps)
            this_multispec_loss = _multispectral_loss(x_target, x_out, hps)
            metrics[f'recons_loss_l{level + 1}'] = this_recons_loss
            metrics[f'spectral_loss_l{level + 1}'] = this_spec_loss
            metrics[f'multispectral_loss_l{level + 1}'] = this_multispec_loss
            recons_loss += this_recons_loss
            spec_loss += this_spec_loss
            multispec_loss += this_multispec_loss

        commit_loss = sum(commit_losses)
        loss = recons_loss + self.spectral * spec_loss + self.multispectral * multispec_loss + self.commit * commit_loss

        with t.no_grad():
            sc = t.mean(spectral_convergence(x_target, x_out, hps))
            l2_loss = _loss_fn("l2", x_target, x_out, hps)
            l1_loss = _loss_fn("l1", x_target, x_out, hps)
            linf_loss = _loss_fn("linf", x_target, x_out, hps)

        quantiser_metrics = average_metrics(quantiser_metrics)

        metrics.update(dict(
            recons_loss=recons_loss,
            spectral_loss=spec_loss,
            multispectral_loss=multispec_loss,
            spectral_convergence=sc,
            l2_loss=l2_loss,
            l1_loss=l1_loss,
            linf_loss=linf_loss,
            commit_loss=commit_loss,
            **quantiser_metrics))

        for key, val in metrics.items():
            metrics[key] = val.detach()

        return x_out, loss, metrics


================================================
FILE: requirements.txt
================================================
fire==0.1.3
tqdm==4.45.0
soundfile==0.10.3.post1
unidecode==1.1.1
numba==0.48.0
librosa==0.7.2
mpi4py>=3.0.0

================================================
FILE: setup.py
================================================
import os

import pkg_resources
from setuptools import setup, find_packages

setup(
    name="jukebox",
    py_modules=["jukebox"],
    version="1.0",
    description="",
    author="OpenAI",
    packages=find_packages(),
    install_requires=[
        str(r)
        for r in pkg_resources.parse_requirements(
            open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
        )
    ],
    include_package_data=True
)


================================================
FILE: tensorboardX/.codecov.yml
================================================
coverage:
  status:
    project:                   # measuring the overall project coverage
      default:                 # context, you can create multiple ones with custom titles
        enabled: yes 
    patch:
      default:
        enabled: no


================================================
FILE: tensorboardX/.flake8
================================================
[flake8]
max-line-length = 120
ignore = E305,E402,E721,E741,F401,F403,F405,F821,F841,F999
exclude = tensorboardX/proto

================================================
FILE: tensorboardX/.github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create bug report
title: ''
labels: ''
assignees: ''

---

**Describe the bug**
A clear and concise description of what the bug is.

**Minimal runnable code to reproduce the behavior**
```
from tensorboardX import SummaryWriter
...
```

**Expected behavior**
A clear and concise description of what you expected to happen.

**Screenshots**
If applicable, add screenshots to help explain your problem.

**Environment**
What is the result of 
`pip list|grep -E "torch|proto|tensor"`
If the version is too old, please try to update first.


**Python environment**
Which version of python are you using? Did you use Andconda or Virtualenv?

**Additional context**
Add any other context about the problem here.


================================================
FILE: tensorboardX/.github/ISSUE_TEMPLATE/feature-requests-or-general-questions.md
================================================
---
name: Feature requests or General questions
about: Feature requests or general questions
title: ''
labels: ''
assignees: ''

---


================================================
FILE: tensorboardX/.gitignore
================================================
proto_src/
protoc-*.zip
protoc/
__pycache__
docs/_*
build
dist
*.egg-info
runs/*
*.pyc


================================================
FILE: tensorboardX/.travis.yml
================================================
dist: xenial
language: python
python:
  # We don't actually use the Travis Python, but this keeps it organized.
  - "2.7"
  - "3.6"

env:
  - PYTORCH_VER="torch"
  - PYTORCH_VER="torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html"

matrix:
  allow_failures:
    - env: PYTORCH_VER="torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html"

install:
  - export MPLBACKEND=Agg
  - export CODECOV_TOKEN="26239910-fe4e-463d-aa3d-e662e9bf39ef"

  - sudo apt-get update
  # We do this conditionally because it saves us some downloading if the
  # version is the same.
  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
      wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh;
    else
      wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
    fi
  - bash miniconda.sh -b -p $HOME/miniconda
  - export PATH="$HOME/miniconda/bin:$PATH"
  - export BOTO_CONFIG=/dev/null  # https://github.com/travis-ci/travis-ci/issues/7940
  - export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
  - hash -r
  - conda config --set always_yes yes --set changeps1 no
  - conda update -q conda
  # Useful for debugging any issues with conda
  - conda info -a

  # Replace dep1 dep2 ... with your dependencies
  - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION
  - source activate test-environment
  - which python
  - pip install future
  - pip install chainer -q
  - pip install torchvision==0.2.1 -q
  - pip uninstall torch -y
  - pip install $PYTORCH_VER
  - pip install moviepy==0.2.3.2 -q
  - pip install matplotlib -q
  - pip install requests -q
  - pip install codecov
  - pip install onnx
  - pip install boto3
  - pip install moto
  - pip install visdom
  - pip install tb-nightly
  - pip install crc32c
  - pip install protobuf==3.8.0
  - conda install ffmpeg
  - conda list
  - python -c "import imageio; imageio.plugins.ffmpeg.download()"
  - pip install --upgrade pytest-cov flake8
  - python setup.py install

script:
  - visdom &
  - sleep 5
  - python -c "import visdom; v = visdom.Visdom()"
  - py.test --cov=tensorboardX tests/
  - python examples/demo.py
  - python examples/demo_graph.py
  - python examples/demo_embedding.py
  - python examples/demo_custom_scalars.py
  - python examples/demo_multiple_embedding.py
  - python examples/demo_purge.py
  - python examples/demo_matplotlib.py
  - pip uninstall -y tensorboardX
  - pip install tensorboardX
  - pytest

after_success:
  - codecov


================================================
FILE: tensorboardX/HISTORY.rst
================================================
History
=======
1.8 (2019-07-05)
-----------------
* Draw label text on image with bounding box provided.
* crc32c speed up (optional by installing crc32c manually)
* Rewrite add_graph. onnx backend is replaced by JIT to support more advanced structure.
* Now you can add_mesh() to visualize colorful point cloud or meshes.

1.7 (2019-05-19)
-----------------
* Able to write to S3
* Fixed raw histogram issue that nothing is shown in TensorBoard
* Users can use various image/video dimension permutation by passing 'dataformats' parameter.
* You can bybass the writer by passing write_to_disk=True to SummaryWriter


1.6 (2019-01-02)
-----------------
* Many graph related bug is fixed in this version.
* New function: add_images(). This function accepts 4D iamge tensor. See documentation.
* Make add_image_with_boxes() usable.
* API change: add_video now accepts BxTxCxHxW instead of BxCxTxHxW tensor.

1.5 (2018-12-10)
-----------------
* Add API for Custom scalar
* Add support for logging directly to S3
* Add support for Caffe2 graph
* Pytorch 1.0.0 JIT graph support (alpha-release)

1.4 (2018-08-09)
-----------------
* Made add_text compatible with tensorboard>1.6
* Fix the issue of strange histogram if default binning method is used
* Supports passing matplotlib figures to add_image()
* Resolve namespace confliction with TF tensorboard
* add_image_boxes function
* Supports custom timestamp for event

1.2 (2018-04-21)
-----------------
* Supports tensorshape information in graph visualization. Drop support for 0.3.1
* Adds add_video function

1.1 (2018-02-21)
-----------------
* Supports pytorch 0.3.1 (hacky)

1.0 (2018-01-18)
-----------------
* Supports graph (the pretty one)

0.9 (2017-11-11)
-----------------
* Supports markdown for add_text function
* It's ready to log precision recall curve (needs tensorboard>=0.4)
* Adds context manager for the SummaryWriter class

0.8 (2017-09-25)
-----------------
* Package name renamed to tensorboardX to fix namespace confliction with tensorflow's tensorboard
* Supports multi-scalars and JSON export
* Multiple Embeddings in One Experiment 
* Supports Chainer and mxnet

0.7 (2017-08-22)
-----------------
* remove tensorflow dependency for embedding function
* fixed incorrect image<->label pairing in embedding function (#12)
* unifies API call and adds docstring. Documentation is available at: http://tensorboard-pytorch.readthedocs.io/

0.6.5 (2017-07-30)
------------------
* add travis test (py2.7, py3.6)
* add support for python2 (in PyPI)

0.6 (2017-07-18)
-----------------
* supports embedding

0.5 (2017-07-18)
-----------------
* supports graph summary
* fixed np.histogram issue

0.4 (2017-07-12)
-----------------
* supports text summary

0.3 (2017-07-03)
-----------------
* supports audio summary

0.2 (2017-06-24)
-----------------
* simplifies add_image API
* speed up add_histogram API by 35x


0.1 (2017-06-13)
------------------
* First commit. Reference:

https://github.com/TeamHG-Memex/tensorboard_logger
https://github.com/dmlc/tensorboard


================================================
FILE: tensorboardX/LICENSE
================================================
MIT License

Copyright (c) 2017 Tzu-Wei Huang

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: tensorboardX/MANIFEST.in
================================================
include HISTORY.rst
include LICENSE
include compile.sh
recursive-include tensorboardX/proto *
recursive-exclude test *
recursive-exclude examples *
recursive-include tensorboardX/beholder *

================================================
FILE: tensorboardX/README.md
================================================
# tensorboardX

[![Build Status](https://travis-ci.org/lanpa/tensorboardX.svg?branch=master)](https://travis-ci.org/lanpa/tensorboardX)
[![PyPI version](https://badge.fury.io/py/tensorboardX.svg)](https://badge.fury.io/py/tensorboardX)
[![Downloads](https://img.shields.io/badge/pip--downloads-5K+-brightgreen.svg)](https://bigquery.cloud.google.com/savedquery/966219917372:edb59a0d70c54eb687ab2a9417a778ee)
[![Documentation Status](https://readthedocs.org/projects/tensorboardx/badge/?version=latest)](https://tensorboardx.readthedocs.io/en/latest/?badge=latest)
[![Documentation Status](https://codecov.io/gh/lanpa/tensorboardX/branch/master/graph/badge.svg)](https://codecov.io/gh/lanpa/tensorboardX/)

Write TensorBoard events with simple function call.

* Support `scalar`, `image`, `figure`, `histogram`, `audio`, `text`, `graph`, `onnx_graph`, `embedding`, `pr_curve`, `mesh`, `hyper-parameters`
  and `video` summaries.

* requirement for `demo_graph.py` is tensorboardX>=1.6 and pytorch>=1.1

* [FAQ](https://github.com/lanpa/tensorboardX/wiki)

## Install

Tested on anaconda2 / anaconda3, with PyTorch 1.1.0 / torchvision 0.3 / tensorboard 1.13.0

`pip install tensorboardX`

or build from source:

`git clone https://github.com/lanpa/tensorboardX && cd tensorboardX && python setup.py install`

You can optionally install [`crc32c`](https://github.com/ICRAR/crc32c) to speed up saving a large amount of data.


## Example

* Run the demo script: `python examples/demo.py`
* Use TensorBoard with `tensorboard --logdir runs`  (needs to install TensorFlow)

```python
# demo.py

import torch
import torchvision.utils as vutils
import numpy as np
import torchvision.models as models
from torchvision import datasets
from tensorboardX import SummaryWriter

resnet18 = models.resnet18(False)
writer = SummaryWriter()
sample_rate = 44100
freqs = [262, 294, 330, 349, 392, 440, 440, 440, 440, 440, 440]

for n_iter in range(100):

    dummy_s1 = torch.rand(1)
    dummy_s2 = torch.rand(1)
    # data grouping by `slash`
    writer.add_scalar('data/scalar1', dummy_s1[0], n_iter)
    writer.add_scalar('data/scalar2', dummy_s2[0], n_iter)

    writer.add_scalars('data/scalar_group', {'xsinx': n_iter * np.sin(n_iter),
                                             'xcosx': n_iter * np.cos(n_iter),
                                             'arctanx': np.arctan(n_iter)}, n_iter)

    dummy_img = torch.rand(32, 3, 64, 64)  # output from network
    if n_iter % 10 == 0:
        x = vutils.make_grid(dummy_img, normalize=True, scale_each=True)
        writer.add_image('Image', x, n_iter)

        dummy_audio = torch.zeros(sample_rate * 2)
        for i in range(x.size(0)):
            # amplitude of sound should in [-1, 1]
            dummy_audio[i] = np.cos(freqs[n_iter // 10] * np.pi * float(i) / float(sample_rate))
        writer.add_audio('myAudio', dummy_audio, n_iter, sample_rate=sample_rate)

        writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter)

        for name, param in resnet18.named_parameters():
            writer.add_histogram(name, param.clone().cpu().data.numpy(), n_iter)

        # needs tensorboard 0.4RC or later
        writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(100), n_iter)

dataset = datasets.MNIST('mnist', train=False, download=True)
images = dataset.test_data[:100].float()
label = dataset.test_labels[:100]

features = images.view(100, 784)
writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))

# export scalar data to JSON for external processing
writer.export_scalars_to_json("./all_scalars.json")
writer.close()
```

## Screenshots

<img src="screenshots/Demo.gif">

## Tweaks

To add more ticks for the slider (show more image history), check https://github.com/lanpa/tensorboardX/issues/44 or 
https://github.com/tensorflow/tensorboard/pull/1138

## Reference

* [TeamHG-Memex/tensorboard_logger](https://github.com/TeamHG-Memex/tensorboard_logger)
* [dmlc/tensorboard](https://github.com/dmlc/tensorboard)


================================================
FILE: tensorboardX/compile.sh
================================================
#!/bin/bash

# Exit on error
# set -e

DESIRED_PROTO_VERSION="3.6.1"

# call protoc direclty, if version is not the desired one, download the desired vesrion.


if [ -f "protoc/bin/protoc" ]; then
  PROTOC_BIN="protoc/bin/protoc"
else
  PROTOC_BIN=`which protoc`
fi

echo "using" $PROTOC_BIN

CURRENT_PROTOC_VER=`${PROTOC_BIN} --version`
if [ -z ${PROTOC_BIN} ] || [[ "$CURRENT_PROTOC_VER" != "libprotoc "$DESIRED_PROTO_VERSION ]]; then
  # Download and use the latest version of protoc.
  if [ "$(uname)" == "Darwin" ]; then
    PROTOC_ZIP="protoc-"$DESIRED_PROTO_VERSION"-osx-x86_64.zip"
  else
    PROTOC_ZIP="protoc-"$DESIRED_PROTO_VERSION"-linux-x86_64.zip"
  fi
  WGET_BIN=`which wget`
  if [[ ! -z ${WGET_BIN} ]]; then
    ${WGET_BIN} https://github.com/protocolbuffers/protobuf/releases/download/v"$DESIRED_PROTO_VERSION"/${PROTOC_ZIP}
    rm -rf protoc
    python -c "import zipfile; zipfile.ZipFile('"${PROTOC_ZIP}"','r').extractall('protoc')"
    PROTOC_BIN=protoc/bin/protoc
    chmod +x ${PROTOC_BIN}
  fi
fi

# Regenerate
if [[ ! -z ${PROTOC_BIN} ]]; then
  # Delete all existing Python protobuf (*_pb2.py) output
  rm -rf tensorboardX/proto/*pb2*.py
  ${PROTOC_BIN} tensorboardX/proto/*.proto --python_out=.

  echo "Done generating tensorboardX/proto/*pb2*.py"
else
  echo "protoc not installed so can't regenerate tensorboardX/proto/*pb2*.py, using precompiled version."
fi


================================================
FILE: tensorboardX/docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
SPHINXPROJ    = tensorboardX
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

================================================
FILE: tensorboardX/docs/conf.py
================================================
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# tensorboardX documentation build configuration file, created by
# sphinx-quickstart on Wed Aug  9 01:38:01 2017.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
# sys.path.insert(0, os.path.abspath('.'))
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
#import tensorboard #uncomment to shadow pip installation
# -- General configuration ------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ['sphinx.ext.autodoc',
    'sphinx.ext.mathjax',
    'sphinx.ext.intersphinx',
    'sphinx.ext.napoleon',
    'sphinx.ext.viewcode',
    'sphinx.ext.githubpages']

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'

# The master toctree document.
master_doc = 'index'

# General information about the project.
project = 'tensorboardX'
copyright = '2017, tensorboardX Contributors'
author = 'tensorboardX Contributors'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = ''
# The full version, including alpha/beta/rc tags.
release = ''

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False


# -- Options for HTML output ----------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ['_static']


# -- Options for HTMLHelp output ------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'tensorboardXdoc'


# -- Options for LaTeX output ---------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',

    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',

    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',

    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'tensorboardX.tex', 'tensorboardX Documentation',
     'tensorboardX Contributors', 'manual'),
]


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
    (master_doc, 'tensorboardX', 'tensorboardX Documentation',
     [author], 1)
]


# -- Options for Texinfo output -------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (master_doc, 'tensorboardX', 'tensorboardX Documentation',
     author, 'tensorboardX', 'One line description of project.',
     'Miscellaneous'),
]


# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
    'python':('https://docs.python.org/3', None),
    'numpy': ('http://docs.scipy.org/doc/numpy/', None),
    'torch': ('http://pytorch.org/docs/master', None),
    'matplotlib': ('http://matplotlib.sourceforge.net/', None),
    }


================================================
FILE: tensorboardX/docs/index.rst
================================================
.. tensorboardX documentation master file, created by
   sphinx-quickstart on Wed Aug  9 01:38:01 2017.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

Welcome to tensorboardX's documentation!
===============================================

.. toctree::
   :maxdepth: 2
   :caption: Contents:

   tensorboard
   utils
   tutorial
   tutorial_zh

Indices and tables
==================

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`


================================================
FILE: tensorboardX/docs/tensorboard.rst
================================================
tensorboardX
===================================
.. automodule:: tensorboardX

.. autoclass:: SummaryWriter
    :members:
    
    .. automethod:: __init__

.. autoclass:: TorchVis
    :members:

    .. automethod:: __init__

================================================
FILE: tensorboardX/docs/tutorial.rst
================================================
Tutorials
*********

What is tensorboard X?
----------------------

At first, the package was named tensorboard, and soon there are issues about
name confliction. The first alternative name came to my mind is
tensorboard-pytorch, but in order to make it more general, I chose tensorboardX
which stands for tensorboard for X.

Google's tensorflow's tensorboard is a web server to serve visualizations of the
training progress of a neural network, it visualizes scalar values, images,
text, etc.; these information are saved as events in tensorflow. It's a pity
that other deep learning frameworks lack of such tool, so there are already
packages letting users to log the events without tensorflow; however they only
provides basic functionalities. The purpose of this package is to let
researchers use a simple interface to log events within PyTorch (and then show
visualization in tensorboard). This package currently supports logging scalar,
image, audio, histogram, text, embedding, and the route of back-propagation. The
following manual is tested on Ubuntu and Mac, and the environment are anaconda's
python2 and python3.


Create a summary writer
-----------------------
Before logging anything, we need to create a writer instance. This can be done with:

.. code-block:: python

    from tensorboardX import SummaryWriter
    #SummaryWriter encapsulates everything
    writer = SummaryWriter('runs/exp-1')
    #creates writer object. The log will be saved in 'runs/exp-1'
    writer2 = SummaryWriter()
    #creates writer2 object with auto generated file name, the dir will be something like 'runs/Aug20-17-20-33'
    writer3 = SummaryWriter(comment='3x learning rate')
    #creates writer3 object with auto generated file name, the comment will be appended to the filename. The dir will be something like 'runs/Aug20-17-20-33-3xlearning rate'

Each subfolder will be treated as different experiments in tensorboard. Each
time you re-run the experiment with different settings, you should change the
name of the sub folder such as ``runs/exp2``, ``runs/myexp`` so that you can
easily compare different experiment settings. Type ``tensorboard runs`` to compare
different runs in tensorboard.


General api format
------------------
.. code-block:: python

    add_something(tag name, object, iteration number)


Add scalar
-----------
Scalar value is the most simple data type to deal with. Mostly we save the loss
value of each training step, or the accuracy after each epoch. Sometimes I save
the corresponding learning rate as well. It's cheap to save scalar value. Just
log anything you think is important. To log a scalar value, use
``writer.add_scalar('myscalar', value, iteration)``. Note that the program complains
if you feed a PyTorch tensor. Remember to extract the scalar value by
``x.item()`` if ``x`` is a torch scalar tensor.


Add image
---------
An image is represented as 3-dimensional tensor. The simplest case is save one
image at a time. In this case, the image should be passed as a 3-dimension
tensor of size ``[3, H, W]``. The three dimensions correspond to R, G, B channel of
an image. After your image is computed, use ``writer.add_image('imresult', x,
iteration)`` to save the image. If you have a batch of images to show, use
``torchvision``'s ``make_grid`` function to prepare the image array and send the result
to ``add_image(...)`` (``make_grid`` takes a 4D tensor and returns tiled images in 3D tensor).

.. Note::
	Remember to normalize your image.


Add histogram
-------------
Saving histograms is expensive. Both in computation time and storage. If training
slows down after using this package, check this first. To save a histogram,
convert the array into numpy array and save with ``writer.add_histogram('hist',
array, iteration)``.


Add figure
----------
You can save a matplotlib figure to tensorboard with the add_figure function. ``figure`` input should be ``matplotlib.pyplot.figure`` or a list of ``matplotlib.pyplot.figure``.
Check `<https://tensorboardx.readthedocs.io/en/latest/tensorboard.html#tensorboardX.SummaryWriter.add_figure>`_ for the detailed usage.

Add graph
---------
To visualize a model, you need a model ``m`` and the input ``t``. ``t`` can be a tensor or a list of tensors
depending on your model. If error happens, make sure that ``m(t)`` runs without problem first. See
`The graph demo <https://github.com/lanpa/tensorboardX/blob/master/examples/demo_graph.py>`_ for
complete example.


Add audio
---------
To log a single channel audio, use ``add_audio(tag, audio, iteration, sample_rate)``, where ``audio`` is an one dimensional array, and each element in the array represents the consecutive amplitude samples.
For a 2 seconds audio with ``sample_rate`` 44100 Hz, the input ``x`` should have 88200 elements.
Each element should lie in [−1, 1].

Add embedding
-------------
Embeddings, high dimensional data, can be visualized and converted
into human perceptible 3D data by tensorboard, which provides PCA and
t-sne to project the data into low dimensional space. What you need to do is
provide a bunch of points and tensorboard will do the rest for you. The bunch of
points is passed as a tensor of size ``n x d``, where ``n`` is the number of points and
``d`` is the feature dimension. The feature representation can either be raw data
(*e.g.* the MNIST image) or a representation learned by your network (extracted
feature). This determines how the points distributes. To make the visualization
more informative, you can pass optional metadata or ``label_imgs`` for each data
points. In this way you can see that neighboring point have similar label and
distant points have very different label (semantically or visually). Here the
metadata is a list of labels, and the length of the list should equal to ``n``, the
number of the points. The ``label_imgs`` is a 4D tensor of size ``NCHW``. ``N`` should equal
to ``n`` as well. See
`The embedding demo <https://github.com/lanpa/tensorboardX/blob/master/examples/demo_embedding.py>`_ for
complete example.


Useful commands
---------------
Install
=======

Simply type ``pip install tensorboardX`` in a unix shell to install this package.
To use the newest version, you might need to build from source or ``pip install
tensorboardX —-no-cache-dir`` .  To run tensorboard web server, you need
to install it using ``pip install tensorboard``.
After that, type ``tensorboard --logdir=<your_log_dir>`` to start the server, where
``your_log_dir`` is the parameter of the object constructor. I think this command is
tedious, so I add a line alias ``tb='tensorboard --logdir '`` in ``~/.bashrc``. In
this way, the above command is simplified as ``tb <your_log_dir>``. Use your favorite
browser to load the tensorboard page, the address will be shown in the terminal
after starting the server.


Misc
----
Performance issue
=================
Logging is cheap, but display is expensive.
For my experience, if there are 3 or more experiments to show at a time and each
experiment have, say, 50k points, tensorboard might need a lot of time to
present the data.


Grouping plots
==============
Usually, there are many numbers to log in one experiment. For example, when
training GANs you should log the loss of the generator, discriminator. If the
loss is composed of two other loss functions, say L1 and MSE, you might want to
log the value of the other two losses as well. In this case, you can write the
tags as Gen/L1, Gen/MSE, Desc/L1, Desc/MSE. In this way, tensorboard will group
the plots into two sections (Gen, Desc). You can also use the regular expression
to filter data.


================================================
FILE: tensorboardX/docs/tutorial_zh.rst
================================================
Tutorials_zh
*************

緣起
------
Google TensorFlow 附加的工具 Tensorboard 是一個很好用的視覺化工具。他可以記錄數字，影像或者是聲音資訊，對於觀察類神經網路訓練的過程非常有幫助。很可惜的是其他的訓練框架（PyTorch, Chainer, numpy）並沒有這麼好用的工具。網路上稍加搜尋可以發現已經有一些現成的套件可以讓不同的訓練框架使用 web 介面來觀察訓練情形，不過他們可以記錄的東西比較有限或是使用起來比較複雜 (tensorboard_logger, visdom)。tensorboardX 的目的就是讓其他 tensorboard 的功能都可以輕易的被非 TensorFlow 的框架使用。
目前這個套件除了 tensorboard beholder 之外支援所有 tensorboard 的紀錄型態。這個套件目前的標準測試環境為 Ubuntu 或是 Mac ，windows 則是有不定期手動測試；使用的 python 版本為 anaconda 的 python3。

安裝
-------
在命令列輸入 ``pip install tensorboardX`` 即可
或是最新版源碼安裝 ``pip install tensorboardX``

使用
-------
建立 event writer 實體
在紀錄任何東西之前，我們需要建立一個 event writer 實體。
from tensorboardX import SummaryWriter 
#SummaryWriter 是一個類別，包含這套件的所有功能。

``writer = SummaryWriter('runs/exp-1')``
#建立實體。資料存放在：``'runs/exp-1'``
#接下來要寫入任何資料都是呼叫 ``writer.add_某功能()``

``writer = SummaryWriter()``
#使用預設名稱建立實體。資料存放在：``'runs/現在時間-機器名字'`` ex. ``'runs/Aug20-obov01'``

``writer = SummaryWriter(comment='3xLR')``
#在預設資料夾後面加上註解 檔名變為：``'runs/Aug20-obov01-3xLR'``
上面的程式碼會在目前的工作目錄下建立一個叫 ``runs`` 的資料夾以及子目錄 ``exp1``。 每個子目錄都會被視為一個實驗。每次執行新的實驗時，比如說改了一些參數，這時請將資料夾重新命名，像是： ``runs/exp2``, ``runs/myexp`` 這樣可以便於比較實驗的結果。 建議：資料夾可以用時間命名或者是直接把參數當成資料夾的名稱。
建立 writer 實體之後就可以開始紀錄資料了
API 的長相大概是：``add_xxx(標籤，要記錄的東西，時間戳，其他參數)``

紀錄純量
-------------
純量是最好記錄的東西。通常我們會把每次訓練的損失記錄下來或者是測試的準確度都是值得記錄的東西。其他數據，像是學習率也值得紀錄。
紀錄純量的方法是 ``writer.add_scalar('myscalar', value, iteration)``
value 可以是 PyTorch tensor ， numpy或是 float，int 之類的python原生數字類別。

記錄影像
-------------
影像使用一個三維的矩陣來表示。這三個維度分別代表紅色，綠色，藍色的強度。一張寬200， 高100的影像其對應的矩陣大小為[3, 100, 200] （CHW）。最簡單情況是只有一張影像要存。這時候只需要注意一下是不是符合上述的規格然後將它傳到: ``writer.add_image('imresult', image, iteration)`` 即可。 
通常訓練的時候會採用批次處理，所以有一大堆影像要存。這時候請確定你的資料維度是 ``(NCHW)``, 其中 ``N`` 是batchsize。``add_image`` 會自動將他排列成適當大小。要注意的是，如果要記錄的影像是 OpenCV/numpy 格式，他們通常呈現 ``(HWC)`` 的排列，這時候要呼叫 ``numpy.transpose`` 將其轉為正確的維度，否則會報錯。另外就是注意影像的值的範圍要介於 [0, 1] 之間。 

紀錄直方圖（histogram）
-------------------------------
記錄直方圖很耗 CPU 資源，不要常用。如果你用了這個套件之後覺得速度變慢了請先檢查一下是不是這個原因。使用方法很簡單，呼叫 ``writer.add_histogram('hist', array, iteration)`` 即可紀錄。

紀錄聲音
-------------
``writer.add_audio('myaudio', audio, iteration, sample_rate)``
這功能只支援單聲道。 add_audio 要傳入的聲音資訊是個一維陣列，陣列的每一個元素代表在每一個取樣點的振幅大小。取樣頻率(sample_rate)為 44100 kHz 的情況下。一段2秒鐘的聲音應該要有88200個點；注意其中每個元素的值應該都介於正負1之間。

紀錄文字
-------------
``writer.add_text('mytext', 'this is a pen', iteration)``
除了一般字串之外，也支援簡單的 markdown 表格。

記錄網路架構。
--------------------------
(實驗性的功能，模型複雜的時候不確定對不對)
問題很多的功能。使用上比較複雜。需要準備兩個東西：網路模型 以及 你要餵給他的 tensor 
舉例來說，令模型為 m，輸入為 x，則使用方法為：
``add_graph(m, (x, ))`` 這裡使用 tuple 的原因是當網路有多個輸入時，可以把他擴充成
``add_graph(m, (x, y, z))`` ，如果只有單一輸入，寫成 ``add_graph(m, x)`` 也無妨。 
常會出錯的原因： 
- 較新的 operator pytorch本身不支援JIT
- 輸入是 cpu tensor，model 在 GPU 上。（或是反過來）
- 輸入的 tensor 大小錯誤，跑到後面幾層維度消失了
- model 寫錯，前後兩層 feature dimension 對不上
除錯方法

forward propagate 一次 ``m(x)`` 或是多個輸入時：``m((x, y, z))``
2. 用 ``torch.onnx.export`` 導出模型，觀察錯誤訊息。

高維度資料視覺化／降維 (embedding)
---------------------------------------------------
因為人類對物體的了解程度只有三維，所以當資料的維度超過三的時候我們沒辦法將他視覺化。這時候就需要降維來讓資料的維度小於等於三。降維運算由 tensorboard 以 Javascript 執行，演算法有 PCA 及 t-sne 兩種可選。這邊我們只需要負責提供每個點的高維度特徵即可。提供的格式是一個矩陣，一個 ``n x d`` 的矩陣 ``n`` 點的數量， ``d`` 是維度的多寡。 高維度特徵可以是原始資料。比如說影像，或是網路學到的壓縮結果。這原始資料決定了資料的分佈情形。如果要看得更清楚一點，你可以再傳 metadata / label_imgs 的參數進去（metadata是一個 python list 長度為 ``n``, ``label_imgs`` 是一個 4 維矩陣，大小是 ``nCHW``。這樣每個點就會有他對應的文字或圖在旁邊。不懂的話就看範例吧：https://github.com/lanpa/tensorboardX/blob/master/examples/demo_embedding.py

紀錄短片
---------------
類似於紀錄影像，不過傳入的物件維度是 ``[B, C, T ,H, W]``，其中 ``T`` 是影格的數量。所以一個 30 frame 的彩色影片 維度是 ``[B, 3, 30 ,H, W]``。

紀錄 pr curve
-------------------
根據預測的機率值以及其對應的標準答案計算 precision-recall 的結果並保存。
``add_pr_curve (tag, labels, predictions, step)``
labels是標準答案，predictions是程式對樣本的預測。 
假設有十筆資料 labels就會長得像 ``[0, 0, 1, 0, 0, 1, 0, 1, 0, 1]``，predictions則長的像 ``[0.1, 0.3, 0.8, 0.2, 0.4, 0.5, 0.1, 0.7, 0.9, 0.2]``。

pyplot 的圖表
------------------------------

用 matplotlib 畫了美美的圖表想紀錄？請用 ``add_figure`` 。傳入的物件是 matplotlib 的 figure。 
顯示結果 
Tensorboard 本質是個網頁伺服器，他讀取的資料來自於訓練網路的時候程式 (tensorboardX) 寫下的事件檔。因為 tensorboard 包含於 tensorflow，所以你需要另外安裝一份 tensorflow 在伺服器主機。我想大部分人都已經裝過了。沒裝過的話就在 unix shell 介面輸入 ``pip install tensorboard``。如果沒有使用 TensorFlow 訓練的需求，建議裝非 GPU 版本，啟動速度快得多。
接下來在命令列輸入 ``tensorboard --logdir=<your_log_dir>`` （以前面的例子來說：``tensorboard --logdir=runs``）伺服器就會啟動了。這個指令打起來很麻煩，所以我都在 ``~/.bashrc`` 加一行：``alias tb='tensorboard --logdir '`` 如此一來指令就簡化成 ``tb <your_log_dir>``。接下來就是照著終端機上的指示打開你的瀏覽器就可以看到畫面了。


================================================
FILE: tensorboardX/docs/utils.rst
================================================
Helper functions
===================================
.. autofunction:: tensorboardX.utils.figure_to_image

================================================
FILE: tensorboardX/examples/RUN_AFTER_PIP_INSTALL
================================================


================================================
FILE: tensorboardX/examples/__init__.py
================================================


================================================
FILE: tensorboardX/examples/chainer/extension_logger/net.py
================================================
#!/usr/bin/env python

from __future__ import print_function

import numpy

import chainer
from chainer import cuda
import chainer.functions as F
import chainer.links as L


def add_noise(h, sigma=0.2):
    xp = cuda.get_array_module(h.data)
    if chainer.config.train:
        return h + sigma * xp.random.randn(*h.shape)
    else:
        return h


class Generator(chainer.Chain):

    def __init__(self, n_hidden, bottom_width=4, ch=512, wscale=0.02):
        super(Generator, self).__init__()
        self.n_hidden = n_hidden
        self.ch = ch
        self.bottom_width = bottom_width

        with self.init_scope():
            w = chainer.initializers.Normal(wscale)
            self.l0 = L.Linear(self.n_hidden, bottom_width * bottom_width * ch,
                               initialW=w)
            self.dc1 = L.Deconvolution2D(ch, ch // 2, 4, 2, 1, initialW=w)
            self.dc2 = L.Deconvolution2D(ch // 2, ch // 4, 4, 2, 1, initialW=w)
            self.dc3 = L.Deconvolution2D(ch // 4, ch // 8, 4, 2, 1, initialW=w)
            self.dc4 = L.Deconvolution2D(ch // 8, 3, 3, 1, 1, initialW=w)
            self.bn0 = L.BatchNormalization(bottom_width * bottom_width * ch)
            self.bn1 = L.BatchNormalization(ch // 2)
            self.bn2 = L.BatchNormalization(ch // 4)
            self.bn3 = L.BatchNormalization(ch // 8)

    def make_hidden(self, batchsize):
        return numpy.random.uniform(-1, 1, (batchsize, self.n_hidden, 1, 1))\
            .astype(numpy.float32)

    def __call__(self, z):
        h = F.reshape(F.relu(self.bn0(self.l0(z))),
                      (len(z), self.ch, self.bottom_width, self.bottom_width))
        h = F.relu(self.bn1(self.dc1(h)))
        h = F.relu(self.bn2(self.dc2(h)))
        h = F.relu(self.bn3(self.dc3(h)))
        x = F.sigmoid(self.dc4(h))
        return x


class Discriminator(chainer.Chain):

    def __init__(self, bottom_width=4, ch=512, wscale=0.02):
        w = chainer.initializers.Normal(wscale)
        super(Discriminator, self).__init__()
        with self.init_scope():
            self.c0_0 = L.Convolution2D(3, ch // 8, 3, 1, 1, initialW=w)
            self.c0_1 = L.Convolution2D(ch // 8, ch // 4, 4, 2, 1, initialW=w)
            self.c1_0 = L.Convolution2D(ch // 4, ch // 4, 3, 1, 1, initialW=w)
            self.c1_1 = L.Convolution2D(ch // 4, ch // 2, 4, 2, 1, initialW=w)
            self.c2_0 = L.Convolution2D(ch // 2, ch // 2, 3, 1, 1, initialW=w)
            self.c2_1 = L.Convolution2D(ch // 2, ch // 1, 4, 2, 1, initialW=w)
            self.c3_0 = L.Convolution2D(ch // 1, ch // 1, 3, 1, 1, initialW=w)
            self.l4 = L.Linear(bottom_width * bottom_width * ch, 1, initialW=w)
            self.bn0_1 = L.BatchNormalization(ch // 4, use_gamma=False)
            self.bn1_0 = L.BatchNormalization(ch // 4, use_gamma=False)
            self.bn1_1 = L.BatchNormalization(ch // 2, use_gamma=False)
            self.bn2_0 = L.BatchNormalization(ch // 2, use_gamma=False)
            self.bn2_1 = L.BatchNormalization(ch // 1, use_gamma=False)
            self.bn3_0 = L.BatchNormalization(ch // 1, use_gamma=False)

    def __call__(self, x):
        h = add_noise(x)
        h = F.leaky_relu(add_noise(self.c0_0(h)))
        h = F.leaky_relu(add_noise(self.bn0_1(self.c0_1(h))))
        h = F.leaky_relu(add_noise(self.bn1_0(self.c1_0(h))))
        h = F.leaky_relu(add_noise(self.bn1_1(self.c1_1(h))))
        h = F.leaky_relu(add_noise(self.bn2_0(self.c2_0(h))))
        h = F.leaky_relu(add_noise(self.bn2_1(self.c2_1(h))))
        h = F.leaky_relu(add_noise(self.bn3_0(self.c3_0(h))))
        return self.l4(h)


================================================
FILE: tensorboardX/examples/chainer/extension_logger/train_dcgan.py
================================================
#!/usr/bin/env python

from __future__ import print_function
import argparse
import os

import chainer
from chainer import training
from chainer.training import extensions

from net import Discriminator
from net import Generator
from updater import DCGANUpdater
from visualize import out_generated_image
from tensorboardX import SummaryWriter
from writetensorboard import LogTensorboard


def main():
    parser = argparse.ArgumentParser(description='Chainer example: DCGAN')
    parser.add_argument('--batchsize', '-b', type=int, default=50,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=1000,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--dataset', '-i', default='',
                        help='Directory of image files.  Default is cifar-10.')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--n_hidden', '-n', type=int, default=100,
                        help='Number of hidden units (z)')
    parser.add_argument('--seed', type=int, default=0,
                        help='Random seed of z at visualization stage')
    parser.add_argument('--snapshot_interval', type=int, default=1000,
                        help='Interval of snapshot')
    parser.add_argument('--display_interval', type=int, default=100,
                        help='Interval of displaying log to console')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# n_hidden: {}'.format(args.n_hidden))
    print('# epoch: {}'.format(args.epoch))
    print('')
    writer = SummaryWriter()
    # Set up a neural network to train
    gen = Generator(n_hidden=args.n_hidden)
    dis = Discriminator()

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        gen.to_gpu()  # Copy the model to the GPU
        dis.to_gpu()

    # Setup an optimizer
    def make_optimizer(model, alpha=0.0002, beta1=0.5):
        optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1)
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001), 'hook_dec')
        return optimizer
    opt_gen = make_optimizer(gen)
    opt_dis = make_optimizer(dis)

    if args.dataset == '':
        # Load the CIFAR10 dataset if args.dataset is not specified
        train, _ = chainer.datasets.get_cifar10(withlabel=False, scale=255.)
    else:
        all_files = os.listdir(args.dataset)
        image_files = [f for f in all_files if ('png' in f or 'jpg' in f)]
        print('{} contains {} image files'
              .format(args.dataset, len(image_files)))
        train = chainer.datasets\
            .ImageDataset(paths=image_files, root=args.dataset)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)

    # Set up a trainer
    updater = DCGANUpdater(
        models=(gen, dis),
        iterator=train_iter,
        optimizer={
            'gen': opt_gen, 'dis': opt_dis},
        device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    snapshot_interval = (args.snapshot_interval, 'iteration')
    display_interval = (args.display_interval, 'iteration')
    trainer.extend(
        extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'),
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        gen, 'gen_iter_{.updater.iteration}.npz'), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        dis, 'dis_iter_{.updater.iteration}.npz'), trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=display_interval))
    trainer.extend(LogTensorboard(trigger=display_interval, logger=writer))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'gen/loss', 'dis/loss',
    ]), trigger=display_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(
        out_generated_image(
            gen, dis,
            10, 10, args.seed, args.out, writer),
        trigger=snapshot_interval)

    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    trainer.run()


if __name__ == '__main__':
    main()


================================================
FILE: tensorboardX/examples/chainer/extension_logger/updater.py
================================================
#!/usr/bin/env python

from __future__ import print_function

import chainer
import chainer.functions as F
from chainer import Variable


class DCGANUpdater(chainer.training.StandardUpdater):

    def __init__(self, *args, **kwargs):
        self.gen, self.dis = kwargs.pop('models')
        super(DCGANUpdater, self).__init__(*args, **kwargs)

    def loss_dis(self, dis, y_fake, y_real):
        batchsize = len(y_fake)
        L1 = F.sum(F.softplus(-y_real)) / batchsize
        L2 = F.sum(F.softplus(y_fake)) / batchsize
        loss = L1 + L2
        chainer.report({'loss': loss}, dis)
        return loss

    def loss_gen(self, gen, y_fake):
        batchsize = len(y_fake)
        loss = F.sum(F.softplus(-y_fake)) / batchsize
        chainer.report({'loss': loss}, gen)
        return loss

    def update_core(self):
        gen_optimizer = self.get_optimizer('gen')
        dis_optimizer = self.get_optimizer('dis')

        batch = self.get_iterator('main').next()
        x_real = Variable(self.converter(batch, self.device)) / 255.
        xp = chainer.cuda.get_array_module(x_real.data)

        gen, dis = self.gen, self.dis
        batchsize = len(batch)

        y_real = dis(x_real)

        z = Variable(xp.asarray(gen.make_hidden(batchsize)))
        x_fake = gen(z)
        y_fake = dis(x_fake)

        dis_optimizer.update(self.loss_dis, dis, y_fake, y_real)
        gen_optimizer.update(self.loss_gen, gen, y_fake)


================================================
FILE: tensorboardX/examples/chainer/extension_logger/visualize.py
================================================
#!/usr/bin/env python

import os

import numpy as np
from PIL import Image

import chainer
import chainer.cuda
from chainer import Variable


def out_generated_image(gen, dis, rows, cols, seed, dst, writer):
    @chainer.training.make_extension()
    def make_image(trainer):
        np.random.seed(seed)
        n_images = rows * cols
        xp = gen.xp
        z = Variable(xp.asarray(gen.make_hidden(n_images)))
        with chainer.using_config('train', False):
            x = gen(z)
        writer.add_image('img', x, trainer.updater.iteration)

    return make_image


================================================
FILE: tensorboardX/examples/chainer/extension_logger/writetensorboard.py
================================================
import json
import os
import shutil
import tempfile

import six
from chainer import reporter
from chainer import serializer as serializer_module
from chainer.training import extension
from chainer.training import trigger as trigger_module


class LogTensorboard(extension.Extension):

    """Trainer extension to output the accumulated results to a log file.

    This extension accumulates the observations of the trainer to
    :class:`~chainer.DictSummary` at a regular interval specified by a supplied
    trigger, and writes them into a log file in JSON format.

    There are two triggers to handle this extension. One is the trigger to
    invoke this extension, which is used to handle the timing of accumulating
    the results. It is set to ``1, 'iteration'`` by default. The other is the
    trigger to determine when to emit the result. When this trigger returns
    True, this extension appends the summary of accumulated values to the list
    of past summaries, and writes the list to the log file. Then, this
    extension makes a new fresh summary object which is used until the next
    time that the trigger fires.

    It also adds some entries to each result dictionary.

    - ``'epoch'`` and ``'iteration'`` are the epoch and iteration counts at the
      output, respectively.
    - ``'elapsed_time'`` is the elapsed time in seconds since the training
      begins. The value is taken from :attr:`Trainer.elapsed_time`.

    Args:
        keys (iterable of strs): Keys of values to accumulate. If this is None,
            all the values are accumulated and output to the log file.
        trigger: Trigger that decides when to aggregate the result and output
            the values. This is distinct from the trigger of this extension
            itself. If it is a tuple in the form ``<int>, 'epoch'`` or
            ``<int>, 'iteration'``, it is passed to :class:`IntervalTrigger`.
        postprocess: Callback to postprocess the result dictionaries. Each
            result dictionary is passed to this callback on the output. This
            callback can modify the result dictionaries, which are used to
            output to the log file.
        log_name (str): Name of the log file under the output directory. It can
            be a format string: the last result dictionary is passed for the
            formatting. For example, users can use '{iteration}' to separate
            the log files for different iterations. If the log name is None, it
            does not output the log to any file.

    """

    def __init__(self, keys=None, trigger=(1, 'epoch'), postprocess=None,
                 log_name='log', logger=None):
        self._keys = keys
        self._trigger = trigger_module.get_trigger(trigger)
        self._postprocess = postprocess
        self._log_name = log_name
        self._log = []
        self._logger = logger
        self._init_summary()

    def __call__(self, trainer):
        # accumulate the observations
        keys = self._keys
        observation = trainer.observation
        summary = self._summary

        if keys is None:
            summary.add(observation)
        else:
            summary.add({k: observation[k] for k in keys if k in observation})
        for k, v in observation.items():
            #self._logger.add_scalar(k, chainer.cuda.to_cpu(observation[k].data), trainer.updater.iteration)
            self._logger.add_scalar(
                k, observation[k], trainer.updater.iteration)
        if self._trigger(trainer):
            # output the result
            stats = self._summary.compute_mean()
            stats_cpu = {}
            for name, value in six.iteritems(stats):
                stats_cpu[name] = float(value)  # copy to CPU

            updater = trainer.updater
            stats_cpu['epoch'] = updater.epoch
            stats_cpu['iteration'] = updater.iteration
            stats_cpu['elapsed_time'] = trainer.elapsed_time

            if self._postprocess is not None:
                self._postprocess(stats_cpu)

            self._log.append(stats_cpu)

            # write to the log file
            if self._log_name is not None:
                log_name = self._log_name.format(**stats_cpu)
                fd, path = tempfile.mkstemp(prefix=log_name, dir=trainer.out)
                with os.fdopen(fd, 'w') as f:
                    json.dump(self._log, f, indent=4)

                new_path = os.path.join(trainer.out, log_name)
                shutil.move(path, new_path)

            # reset the summary for the next output
            self._init_summary()

    @property
    def log(self):
        """The current list of observation dictionaries."""
        return self._log

    def serialize(self, serializer):
        if hasattr(self._trigger, 'serialize'):
            self._trigger.serialize(serializer['_trigger'])

        # Note that this serialization may lose some information of small
        # numerical differences.
        if isinstance(serializer, serializer_module.Serializer):
            log = json.dumps(self._log)
            serializer('_log', log)
        else:
            log = serializer('_log', '')
            self._log = json.loads(log)

    def _init_summary(self):
        self._summary = reporter.DictSummary()


================================================
FILE: tensorboardX/examples/chainer/plain_logger/data.py
================================================
import gzip
import os

import numpy as np
import six
from six.moves.urllib import request

parent = 'http://yann.lecun.com/exdb/mnist'
train_images = 'train-images-idx3-ubyte.gz'
train_labels = 'train-labels-idx1-ubyte.gz'
test_images = 't10k-images-idx3-ubyte.gz'
test_labels = 't10k-labels-idx1-ubyte.gz'
num_train = 60000
num_test = 10000
dim = 784


def load_mnist(images, labels, num):
    data = np.zeros(num * dim, dtype=np.uint8).reshape((num, dim))
    target = np.zeros(num, dtype=np.uint8).reshape((num, ))

    with gzip.open(images, 'rb') as f_images,\
            gzip.open(labels, 'rb') as f_labels:
        f_images.read(16)
        f_labels.read(8)
        for i in six.moves.range(num):
            target[i] = ord(f_labels.read(1))
            for j in six.moves.range(dim):
                data[i, j] = ord(f_images.read(1))

    return data, target


def download_mnist_data():
    print('Downloading {:s}...'.format(train_images))
    request.urlretrieve('{:s}/{:s}'.format(parent, train_images), train_images)
    print('Done')
    print('Downloading {:s}...'.format(train_labels))
    request.urlretrieve('{:s}/{:s}'.format(parent, train_labels), train_labels)
    print('Done')
    print('Downloading {:s}...'.format(test_images))
    request.urlretrieve('{:s}/{:s}'.format(parent, test_images), test_images)
    print('Done')
    print('Downloading {:s}...'.format(test_labels))
    request.urlretrieve('{:s}/{:s}'.format(parent, test_labels), test_labels)
    print('Done')

    print('Converting training data...')
    data_train, target_train = load_mnist(train_images, train_labels,
                                          num_train)
    print('Done')
    print('Converting test data...')
    data_test, target_test = load_mnist(test_images, test_labels, num_test)
    mnist = {'data': np.append(data_train, data_test, axis=0),
             'target': np.append(target_train, target_test, axis=0)}
    print('Done')
    print('Save output...')
    with open('mnist.pkl', 'wb') as output:
        six.moves.cPickle.dump(mnist, output, -1)
    print('Done')
    print('Convert completed')


def load_mnist_data():
    if not os.path.exists('mnist.pkl'):
        download_mnist_data()
    with open('mnist.pkl', 'rb') as mnist_pickle:
        mnist = six.moves.cPickle.load(mnist_pickle)
    return mnist


================================================
FILE: tensorboardX/examples/chainer/plain_logger/net.py
================================================
import six

import chainer
import chainer.functions as F
from chainer.functions.loss.vae import gaussian_kl_divergence
import chainer.links as L


class VAE(chainer.Chain):
    """Variational AutoEncoder"""

    def __init__(self, n_in, n_latent, n_h):
        super(VAE, self).__init__()
        with self.init_scope():
            # encoder
            self.le1 = L.Linear(n_in, n_h)
            self.le2_mu = L.Linear(n_h, n_latent)
            self.le2_ln_var = L.Linear(n_h, n_latent)
            # decoder
            self.ld1 = L.Linear(n_latent, n_h)
            self.ld2 = L.Linear(n_h, n_in)

    def __call__(self, x, sigmoid=True):
        """AutoEncoder"""
        return self.decode(self.encode(x)[0], sigmoid)

    def encode(self, x):
        h1 = F.tanh(self.le1(x))
        mu = self.le2_mu(h1)
        ln_var = self.le2_ln_var(h1)  # log(sigma**2)
        return mu, ln_var

    def decode(self, z, sigmoid=True):
        h1 = F.tanh(self.ld1(z))
        h2 = self.ld2(h1)
        if sigmoid:
            return F.sigmoid(h2)
        else:
            return h2

    def get_loss_func(self, C=1.0, k=1):
        """Get loss function of VAE.

        The loss value is equal to ELBO (Evidence Lower Bound)
        multiplied by -1.

        Args:
            C (int): Usually this is 1.0. Can be changed to control the
                second term of ELBO bound, which works as regularization.
            k (int): Number of Monte Carlo samples used in encoded vector.
        """
        def lf(x):
            mu, ln_var = self.encode(x)
            batchsize = len(mu.data)
            # reconstruction loss
            rec_loss = 0
            for l in six.moves.range(k):
                z = F.gaussian(mu, ln_var)
                rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \
                    / (k * batchsize)
            self.rec_loss = rec_loss
            self.loss = self.rec_loss + \
                C * gaussian_kl_divergence(mu, ln_var) / batchsize
            return self.loss
        return lf


================================================
FILE: tensorboardX/examples/chainer/plain_logger/train_vae.py
================================================
#!/usr/bin/env python
"""Chainer example: train a VAE on MNIST
"""
from __future__ import print_function
import argparse

import matplotlib
# Disable interactive backend
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import six

import chainer
from chainer import computational_graph
from chainer import cuda
from chainer import optimizers
from chainer import serializers
from tensorboardX import SummaryWriter
import data
import net

writer = SummaryWriter()

parser = argparse.ArgumentParser(description='Chainer example: MNIST')
parser.add_argument('--initmodel', '-m', default='',
                    help='Initialize the model from given file')
parser.add_argument('--resume', '-r', default='',
                    help='Resume the optimization from snapshot')
parser.add_argument('--gpu', '-g', default=-1, type=int,
                    help='GPU ID (negative value indicates CPU)')
parser.add_argument('--epoch', '-e', default=100, type=int,
                    help='number of epochs to learn')
parser.add_argument('--dimz', '-z', default=20, type=int,
                    help='dimention of encoded vector')
parser.add_argument('--batchsize', '-b', type=int, default=100,
                    help='learning minibatch size')
parser.add_argument('--test', action='store_true',
                    help='Use tiny datasets for quick tests')
args = parser.parse_args()

batchsize = args.batchsize
n_epoch = args.epoch
n_latent = args.dimz

writer.add_text('config', str(args))

print('GPU: {}'.format(args.gpu))
print('# dim z: {}'.format(args.dimz))
print('# Minibatch-size: {}'.format(args.batchsize))
print('# epoch: {}'.format(args.epoch))
print('')

# Prepare dataset
print('load MNIST dataset')
mnist = data.load_mnist_data()
mnist['data'] = mnist['data'].astype(np.float32)
mnist['data'] /= 255
mnist['target'] = mnist['target'].astype(np.int32)

if args.test:
    mnist['data'] = mnist['data'][0:100]
    mnist['target'] = mnist['target'][0:100]
    N = 30
else:
    N = 60000

x_train, x_test = np.split(mnist['data'],   [N])
y_train, y_test = np.split(mnist['target'], [N])
N_test = y_test.size

# Prepare VAE model, defined in net.py
model = net.VAE(784, n_latent, 500)
if args.gpu >= 0:
    cuda.get_device_from_id(args.gpu).use()
    model.to_gpu()
xp = np if args.gpu < 0 else cuda.cupy

# Setup optimizer
optimizer = optimizers.Adam()
optimizer.setup(model)

# Init/Resume
if args.initmodel:
    print('Load model from', args.initmodel)
    serializers.load_npz(args.initmodel, model)
if args.resume:
    print('Load optimizer state from', args.resume)
    serializers.load_npz(args.resume, optimizer)

# Learning loop
for epoch in six.moves.range(1, n_epoch + 1):
    print('epoch', epoch)

    # training
    perm = np.random.permutation(N)
    sum_loss = 0       # total loss
    sum_rec_loss = 0   # reconstruction loss
    for i in six.moves.range(0, N, batchsize):
        x = chainer.Variable(xp.asarray(x_train[perm[i:i + batchsize]]))
        optimizer.update(model.get_loss_func(), x)
        if epoch == 1 and i == 0:
            with open('graph.dot', 'w') as o:
                g = computational_graph.build_computational_graph(
                    (model.loss, ))
                o.write(g.dump())
            print('graph generated')
        writer.add_scalar('train/loss', model.loss, epoch * N + i)
        writer.add_scalar('train/rec_loss', model.rec_loss, epoch * N + i)
        sum_loss += float(model.loss.data) * len(x.data)
        sum_rec_loss += float(model.rec_loss.data) * len(x.data)

    print('train mean loss={}, mean reconstruction loss={}'
          .format(sum_loss / N, sum_rec_loss / N))

    # evaluation
    sum_loss = 0
    sum_rec_loss = 0
    with chainer.no_backprop_mode():
        for i in six.moves.range(0, N_test, batchsize):
            x = chainer.Variable(xp.asarray(x_test[i:i + batchsize]))
            loss_func = model.get_loss_func(k=10)
            loss_func(x)
            sum_loss += float(model.loss.data) * len(x.data)
            sum_rec_loss += float(model.rec_loss.data) * len(x.data)
            writer.add_scalar('test/loss', model.loss, epoch * N_test + i)
            writer.add_scalar('test/rec_loss', model.rec_loss,
                              epoch * N_test + i)
            writer.add_image('reconstructed', model(
                x).reshape(-1, 1, 28, 28), epoch * N_test + i)
            writer.add_image('input', x.reshape(-1, 1, 28, 28),
                             epoch * N_test + i)
            del model.loss
    print('test  mean loss={}, mean reconstruction loss={}'
          .format(sum_loss / N_test, sum_rec_loss / N_test))


# Save the model and the optimizer
print('save the model')
serializers.save_npz('mlp.model', model)
print('save the optimizer')
serializers.save_npz('mlp.state', optimizer)

model.to_cpu()


# original images and reconstructed images
def save_images(x, filename):
    fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100)
    for ai, xi in zip(ax.flatten(), x):
        ai.imshow(xi.reshape(28, 28))
    fig.savefig(filename)


train_ind = [1, 3, 5, 10, 2, 0, 13, 15, 17]
x = chainer.Variable(np.asarray(x_train[train_ind]))
with chainer.no_backprop_mode():
    x1 = model(x)
save_images(x.data, 'train')
save_images(x1.data, 'train_reconstructed')

test_ind = [3, 2, 1, 18, 4, 8, 11, 17, 61]
x = chainer.Variable(np.asarray(x_test[test_ind]))
with chainer.no_backprop_mode():
    x1 = model(x)
save_images(x.data, 'test')
save_images(x1.data, 'test_reconstructed')


# draw images from randomly sampled z
z = chainer.Variable(np.random.normal(0, 1, (9, n_latent)).astype(np.float32))
x = model.decode(z)
save_images(x.data, 'sampled')


================================================
FILE: tensorboardX/examples/demo.py
================================================
import torch
import torchvision.utils as vutils
import numpy as np
import torchvision.models as models
from torchvision import datasets
from tensorboardX import SummaryWriter
import datetime

resnet18 = models.resnet18(False)
writer = SummaryWriter()
sample_rate = 44100
freqs = [262, 294, 330, 349, 392, 440, 440, 440, 440, 440, 440]

true_positive_counts = [75, 64, 21, 5, 0]
false_positive_counts = [150, 105, 18, 0, 0]
true_negative_counts = [0, 45, 132, 150, 150]
false_negative_counts = [0, 11, 54, 70, 75]
precision = [0.3333333, 0.3786982, 0.5384616, 1.0, 0.0]
recall = [1.0, 0.8533334, 0.28, 0.0666667, 0.0]


for n_iter in range(100):
    s1 = torch.rand(1)  # value to keep
    s2 = torch.rand(1)
    # data grouping by `slash`
    writer.add_scalar('data/scalar_systemtime', s1[0], n_iter)
    # data grouping by `slash`
    writer.add_scalar('data/scalar_customtime', s1[0], n_iter, walltime=n_iter)
    writer.add_scalars('data/scalar_group', {"xsinx": n_iter * np.sin(n_iter),
                                             "xcosx": n_iter * np.cos(n_iter),
                                             "arctanx": np.arctan(n_iter)}, n_iter)
    x = torch.rand(32, 3, 64, 64)  # output from network
    if n_iter % 10 == 0:
        x = vutils.make_grid(x, normalize=True, scale_each=True)
        writer.add_image('Image', x, n_iter)  # Tensor
        writer.add_image_with_boxes('imagebox_label', torch.ones(3, 240, 240) * 0.5,
             torch.Tensor([[10, 10, 100, 100], [101, 101, 200, 200]]),
             n_iter, 
             labels=['abcde' + str(n_iter), 'fgh' + str(n_iter)])
        x = torch.zeros(sample_rate * 2)
        for i in range(x.size(0)):
            # sound amplitude should in [-1, 1]
            x[i] = np.cos(freqs[n_iter // 10] * np.pi *
                          float(i) / float(sample_rate))
        writer.add_audio('myAudio', x, n_iter)
        writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter)
        writer.add_text('markdown Text', '''a|b\n-|-\nc|d''', n_iter)
        for name, param in resnet18.named_parameters():
            if 'bn' not in name:
                writer.add_histogram(name, param, n_iter)
        writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(
            100), n_iter)  # needs tensorboard 0.4RC or later
        writer.add_pr_curve_raw('prcurve with raw data', true_positive_counts,
                                false_positive_counts,
                                true_negative_counts,
                                false_negative_counts,
                                precision,
                                recall, n_iter)
# export scalar data to JSON for external processing
writer.export_scalars_to_json("./all_scalars.json")

dataset = datasets.MNIST('mnist', train=False, download=True)
images = dataset.test_data[:100].float()
label = dataset.test_labels[:100]
features = images.view(100, 784)
writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))
writer.add_embedding(features, global_step=1, tag='noMetadata')
dataset = datasets.MNIST('mnist', train=True, download=True)
images_train = dataset.train_data[:100].float()
labels_train = dataset.train_labels[:100]
features_train = images_train.view(100, 784)

all_features = torch.cat((features, features_train))
all_labels = torch.cat((label, labels_train))
all_images = torch.cat((images, images_train))
dataset_label = ['test'] * 100 + ['train'] * 100
all_labels = list(zip(all_labels, dataset_label))

writer.add_embedding(all_features, metadata=all_labels, label_img=all_images.unsqueeze(1),
                     metadata_header=['digit', 'dataset'], global_step=2)

# VIDEO
vid_images = dataset.train_data[:16 * 48]
vid = vid_images.view(16, 48, 1, 28, 28)  # BxTxCxHxW

writer.add_video('video', vid_tensor=vid)
writer.add_video('video_1_fps', vid_tensor=vid, fps=1)

writer.close()


================================================
FILE: tensorboardX/examples/demo_beholder.py
================================================
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Simple MNIST classifier to demonstrate features of Beholder.

Based on tensorflow/examples/tutorials/mnist/mnist_with_summaries.py.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorboardX.beholder as beholder_lib
import time

from collections import namedtuple


LOG_DIRECTORY = '/tmp/beholder-demo'
tensor_and_name = namedtuple('tensor_and_name', 'tensor, name')


def beholder_pytorch():
    for i in range(1000):
        fake_param = [tensor_and_name(np.random.randn(128, 768, 3), 'test' + str(i))
                      for i in range(5)]
        arrays = [tensor_and_name(np.random.randn(128, 768, 3), 'test' + str(i))
                  for i in range(5)]
        beholder = beholder_lib.Beholder(logdir=LOG_DIRECTORY)
        beholder.update(
            trainable=fake_param,
            arrays=arrays,
            frame=np.random.randn(128, 128),
        )
        time.sleep(0.1)
        print(i)


if __name__ == '__main__':
    import os
    if not os.path.exists(LOG_DIRECTORY):
        os.makedirs(LOG_DIRECTORY)
    print(LOG_DIRECTORY)
    beholder_pytorch()


================================================
FILE: tensorboardX/examples/demo_caffe2.py
================================================
try:
    import caffe2.python.predictor.predictor_exporter as pe
except ImportError:
    print('Please check that Caffe2 is installed correctly to run this demo.')
import numpy as np
import os
import shutil

from caffe2.python import core, model_helper, workspace, brew
from tensorboardX import TorchVis

"""
This is a demo showcasing specific functionality for Caffe2. Shown here are
    add_scalar (with both raw numerical data and Caffe2 blob names)
    add_scalars (with both raw numerical data and Caffe2 blob names)
    add_graph (visualizing a Caffe2 model as a graph)

NOTE: lmdb must be installed and enabled with -DUSE_LMDB=ON for this demo to work.
"""

# If you would like to see some really detailed initializations,
# you can change --caffe2_log_level=0 to --caffe2_log_level=-1
core.GlobalInit(['caffe2', '--caffe2_log_level=0'])
print("Necessities imported!")


# This section preps your image and test set in a lmdb database
def DownloadResource(url, path):
    '''Downloads resources from s3 by url and unzips them to the provided path'''
    import requests
    from six import BytesIO
    import zipfile
    print("Downloading... {} to {}".format(url, path))
    r = requests.get(url, stream=True)
    z = zipfile.ZipFile(BytesIO(r.content))
    z.extractall(path)
    print("Completed download and extraction.")


current_folder = os.path.join(os.path.expanduser('~'), 'caffe2_notebooks')
data_folder = os.path.join(current_folder, 'tutorial_data', 'mnist')
root_folder = os.path.join(current_folder, 'tutorial_files', 'tutorial_mnist')
db_missing = False

if not os.path.exists(data_folder):
    os.makedirs(data_folder)
    print("Your data folder was not found!! This was generated: {}".format(data_folder))

# Look for existing database: lmdb
if os.path.exists(os.path.join(data_folder, "mnist-train-nchw-lmdb")):
    print("lmdb train db found!")
else:
    db_missing = True

if os.path.exists(os.path.join(data_folder, "mnist-test-nchw-lmdb")):
    print("lmdb test db found!")
else:
    db_missing = True

# attempt the download of the db if either was missing
if db_missing:
    print("one or both of the MNIST lmbd dbs not found!!")
    db_url = "http://download.caffe2.ai/databases/mnist-lmdb.zip"
    try:
        DownloadResource(db_url, data_folder)
    except Exception as ex:
        print(
            "Failed to download dataset. Please download it manually from {}".format(db_url))
        print("Unzip it and place the two database folders here: {}".format(data_folder))
        raise ex

if os.path.exists(root_folder):
    print("Looks like you ran this before, so we need to cleanup those old files...")
    shutil.rmtree(root_folder)

os.makedirs(root_folder)
workspace.ResetWorkspace(root_folder)

print("training data folder:" + data_folder)
print("workspace root folder:" + root_folder)

# END DATA PREPARATION #

# Create TorchVis in preparation for writing. Default format is 'tensorboard'
tv = TorchVis()


def AddInput(model, batch_size, db, db_type):
    # load the data
    data_uint8, label = model.TensorProtosDBInput(
        [], ["data_uint8", "label"], batch_size=batch_size,
        db=db, db_type=db_type)
    # cast the data to float
    data = model.Cast(data_uint8, "data", to=core.DataType.FLOAT)
    # scale data from [0,255] down to [0,1]
    data = model.Scale(data, data, scale=float(1. / 256))
    # don't need the gradient for the backward pass
    data = model.StopGradient(data, data)
    return data, label


def AddLeNetModel(model, data):
    '''
    This part is the standard LeNet model: from data to the softmax prediction.

    For each convolutional layer we specify dim_in - number of input channels
    and dim_out - number or output channels. Also each Conv and MaxPool layer changes the
    image size. For example, kernel of size 5 reduces each side of an image by 4.

    While when we have kernel and stride sizes equal 2 in a MaxPool layer, it divides
    each side in half.
    '''
    # Image size: 28 x 28 -> 24 x 24
    conv1 = brew.conv(model, data, 'conv1', dim_in=1, dim_out=20, kernel=5)
    # Image size: 24 x 24 -> 12 x 12
    pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
    # Image size: 12 x 12 -> 8 x 8
    conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=100, kernel=5)
    # Image size: 8 x 8 -> 4 x 4
    pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
    # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the
    # image size
    fc3 = brew.fc(model, pool2, 'fc3', dim_in=100 * 4 * 4, dim_out=500)
    relu = brew.relu(model, fc3, fc3)
    pred = brew.fc(model, relu, 'pred', 500, 10)
    softmax = brew.softmax(model, pred, 'softmax')
    return softmax


def AddAccuracy(model, softmax, label):
    """Adds an accuracy op to the model"""
    accuracy = brew.accuracy(model, [softmax, label], "accuracy")
    return accuracy


def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # track the accuracy of the model
    AddAccuracy(model, softmax, label)
    # use the average loss we just computed to add gradient operators to the
    # model
    model.AddGradientOperators([loss])
    # do a simple stochastic gradient descent
    ITER = brew.iter(model, "iter")
    # set the learning rate schedule
    LR = model.LearningRate(
        ITER, "LR", base_lr=-0.1, policy="step", stepsize=1, gamma=0.999)
    # ONE is a constant value that is used in the gradient update. We only need
    # to create it once, so it is explicitly placed in param_init_net.
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    # Now, for each parameter, we do the gradient updates.
    for param in model.params:
        # Note how we get the gradient of each parameter - ModelHelper keeps
        # track of that.
        param_grad = model.param_to_grad[param]
        # The update is a simple weighted sum: param = param + param_grad * LR
        model.WeightedSum([param, ONE, param_grad, LR], param)


def AddBookkeepingOperators(model):
    """This adds a few bookkeeping operators that we can inspect later.

    These operators do not affect the training procedure: they only collect
    statistics and prints them to file or to logs.
    """
    # Print basically prints out the content of the blob. to_file=1 routes the
    # printed output to a file. The file is going to be stored under
    #     root_folder/[blob name]
    model.Print('accuracy', [], to_file=1)
    model.Print('loss', [], to_file=1)
    # Summarizes the parameters. Different from Print, Summarize gives some
    # statistics of the parameter, such as mean, std, min and max.
    for param in model.params:
        model.Summarize(param, [], to_file=1)
        model.Summarize(model.param_to_grad[param], [], to_file=1)
    # Now, if we really want to be verbose, we can summarize EVERY blob
    # that the model produces; it is probably not a good idea, because that
    # is going to take time - summarization do not come for free. For this
    # demo, we will only show how to summarize the parameters and their
    # gradients.


arg_scope = {"order": "NCHW"}
train_model = model_helper.ModelHelper(name="mnist_train", arg_scope=arg_scope)
data, label = AddInput(
    train_model, batch_size=64,
    db=os.path.join(data_folder, 'mnist-train-nchw-lmdb'),
    db_type='lmdb')
softmax = AddLeNetModel(train_model, data)
AddTrainingOperators(train_model, softmax, label)
AddBookkeepingOperators(train_model)

# Visualize the Caffe2 model in Tensorboard
tv.add_graph(train_model, data)

# Testing model. We will set the batch size to 100, so that the testing
# pass is 100 iterations (10,000 images in total).
# For the testing model, we need the data input part, the main LeNetModel
# part, and an accuracy part. Note that init_params is set False because
# we will be using the parameters obtained from the train model.
test_model = model_helper.ModelHelper(
    name="mnist_test", arg_scope=arg_scope, init_params=False)
data, label = AddInput(
    test_model, batch_size=100,
    db=os.path.join(data_folder, 'mnist-test-nchw-lmdb'),
    db_type='lmdb')
softmax = AddLeNetModel(test_model, data)
AddAccuracy(test_model, softmax, label)

# Deployment model. We simply need the main LeNetModel part.
deploy_model = model_helper.ModelHelper(
    name="mnist_deploy", arg_scope=arg_scope, init_params=False)
AddLeNetModel(deploy_model, "data")
# You may wonder what happens with the param_init_net part of the deploy_model.
# No, we will not use them, since during deployment time we will not randomly
# initialize the parameters, but load the parameters from the db.

with open(os.path.join(root_folder, "train_net.pbtxt"), 'w') as fid:
    fid.write(str(train_model.net.Proto()))
with open(os.path.join(root_folder, "train_init_net.pbtxt"), 'w') as fid:
    fid.write(str(train_model.param_init_net.Proto()))
with open(os.path.join(root_folder, "test_net.pbtxt"), 'w') as fid:
    fid.write(str(test_model.net.Proto()))
with open(os.path.join(root_folder, "test_init_net.pbtxt"), 'w') as fid:
    fid.write(str(test_model.param_init_net.Proto()))
with open(os.path.join(root_folder, "deploy_net.pbtxt"), 'w') as fid:
    fid.write(str(deploy_model.net.Proto()))
print("Protocol buffers files have been created in your root folder: " + root_folder)

# The parameter initialization network only needs to be run once.
workspace.RunNetOnce(train_model.param_init_net)
# creating the network
workspace.CreateNet(train_model.net, overwrite=True)
# set the number of iterations and track the accuracy & loss
total_iters = 200
accuracy = np.zeros(total_iters)
loss = np.zeros(total_iters)
# Now, we will manually run the network for 200 iterations.
for i in range(total_iters):
    workspace.RunNet(train_model.net)
    accuracy[i] = workspace.FetchBlob('accuracy')
    loss[i] = workspace.FetchBlob('loss')
    scalar_dict_raw = {'accuracy': accuracy[i], 'loss': loss[i]}
    scalar_dict_blobname = {'accuracy': 'accuracy', 'loss': 'loss'}
    # Can pass raw numerical data
    tv.add_scalars('training_raw', scalar_dict_raw, i)
    # Can also pass blobname corresponding to data, for fetching
    tv.add_scalars('training_blobname', scalar_dict_blobname, i)

data = workspace.FetchBlob('data')
softmax = workspace.FetchBlob('softmax')

# Convolutions for this mini-batch
conv = workspace.FetchBlob('conv1')
shape = list(conv.shape)
shape[1] = 1
# We can look into any channel. This of it as a feature model learned
conv = conv[:, 15, :, :].reshape(shape)

# run a test pass on the test net
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
test_accuracy = np.zeros(100)
for i in range(100):
    workspace.RunNet(test_model.net.Proto().name)
    test_accuracy[i] = workspace.FetchBlob('accuracy')
    tv.add_scalar('test_accuracy_raw', test_accuracy[i], i)
    tv.add_scalar('test_accuracy_blobname', 'accuracy', i)
# After the execution is done, let's plot the values.
print('test_accuracy: %f' % test_accuracy.mean())


================================================
FILE: tensorboardX/examples/demo_custom_scalars.py
================================================
from numpy.random import rand
from tensorboardX import SummaryWriter
import time


with SummaryWriter() as writer:
    for n_iter in range(100):
        writer.add_scalar('twse/0050', rand(), n_iter)
        writer.add_scalar('twse/2330', rand(), n_iter)
        t = rand()
        writer.add_scalar('dow/aaa', t, n_iter)
        writer.add_scalar('dow/bbb', t - 1, n_iter)
        writer.add_scalar('dow/ccc', t + 1, n_iter)
        writer.add_scalar('nasdaq/aaa', rand(), n_iter)
        writer.add_scalar('nasdaq/bbb', rand(), n_iter)
        writer.add_scalar('nasdaq/ccc', rand(), n_iter)

    layout = {'Taiwan': {'twse': ['Multiline', ['twse/0050', 'twse/2330']]},
              'USA': {'dow': ['Margin', ['dow/aaa', 'dow/bbb', 'dow/ccc']],
                      'nasdaq': ['Margin', ['nasdaq/aaa', 'nasdaq/bbb', 'nasdaq/ccc']]}}
    writer.add_custom_scalars(layout)
#    writer.add_custom_scalars(layout) second call has no effect

time.sleep(1)

with SummaryWriter() as writer:
    for n_iter in range(100):
        writer.add_scalar('twse/0050', rand(), n_iter)
        writer.add_scalar('twse/2330', rand(), n_iter)

    writer.add_custom_scalars_multilinechart(['twse/0050', 'twse/2330'])

time.sleep(1)

with SummaryWriter() as writer:
    for n_iter in range(100):
        t = rand()
        writer.add_scalar('dow/aaa', t, n_iter)
        writer.add_scalar('dow/bbb', t - 1, n_iter)
        writer.add_scalar('dow/ccc', t + 1, n_iter)

    writer.add_custom_scalars_marginchart(['dow/aaa', 'dow/bbb', 'dow/ccc'])


================================================
FILE: tensorboardX/examples/demo_embedding.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
from torch.autograd.variable import Variable
from tensorboardX import SummaryWriter
from torch.utils.data import TensorDataset, DataLoader

# EMBEDDING VISUALIZATION FOR A TWO-CLASSES PROBLEM

# just a bunch of layers


class M(nn.Module):
    def __init__(self):
        super(M, self).__init__()
        self.cn1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3)
        self.cn2 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3)
        self.fc1 = nn.Linear(in_features=128, out_features=2)

    def forward(self, i):
        i = self.cn1(i)
        i = F.relu(i)
        i = F.max_pool2d(i, 2)
        i = self.cn2(i)
        i = F.relu(i)
        i = F.max_pool2d(i, 2)
        i = i.view(len(i), -1)
        i = self.fc1(i)
        i = F.log_softmax(i, dim=1)
        return i

# get some random data around value


def get_data(value, shape):
    data = torch.ones(shape) * value
    # add some noise
    data += torch.randn(shape)**2
    return data


# dataset
# cat some data with different values
data = torch.cat(
    (get_data(
        0, (100, 1, 14, 14)), get_data(
            0.5, (100, 1, 14, 14))), 0)
# labels
labels = torch.cat((torch.zeros(100), torch.ones(100)), 0)
# generator
gen = DataLoader(TensorDataset(data, labels), batch_size=25, shuffle=True)
# network
m = M()
#loss and optim
loss = nn.NLLLoss()
optimizer = torch.optim.Adam(params=m.parameters())
# settings for train and log
num_epochs = 20
embedding_log = 5
writer = SummaryWriter(comment='mnist_embedding_training')

# TRAIN
for epoch in range(num_epochs):
    for j, sample in enumerate(gen):
        n_iter = (epoch * len(gen)) + j
        # reset grad
        m.zero_grad()
        optimizer.zero_grad()
        # get batch data
        data_batch = Variable(sample[0], requires_grad=True).float()
        label_batch = Variable(sample[1], requires_grad=False).long()
        # FORWARD
        out = m(data_batch)
        loss_value = loss(out, label_batch)
        # BACKWARD
        loss_value.backward()
        optimizer.step()
        # LOGGING
        writer.add_scalar('loss', loss_value.data.item(), n_iter)

        if j % embedding_log == 0:
            print("loss_value:{}".format(loss_value.data.item()))
            # we need 3 dimension for tensor to visualize it!
            out = torch.cat((out.data, torch.ones(len(out), 1)), 1)
            writer.add_embedding(
                out,
                metadata=label_batch.data,
                label_img=data_batch.data,
                global_step=n_iter)

writer.close()

# tensorboard --logdir runs
# you should now see a dropdown list with all the timestep,
# last timestep should have a visible separation between the two classes


================================================
FILE: tensorboardX/examples/demo_graph.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.autograd import Variable
from tensorboardX import SummaryWriter

dummy_input = (torch.zeros(1, 3),)


class LinearInLinear(nn.Module):
    def __init__(self):
        super(LinearInLinear, self).__init__()
        self.l = nn.Linear(3, 5)

    def forward(self, x):
        return self.l(x)

with SummaryWriter(comment='LinearInLinear') as w:
    w.add_graph(LinearInLinear(), dummy_input, True)


class MultipleInput(nn.Module):
    def __init__(self):
        super(MultipleInput, self).__init__()
        self.Linear_1 = nn.Linear(3, 5)


    def forward(self, x, y):
        return self.Linear_1(x+y)

with SummaryWriter(comment='MultipleInput') as w:
    w.add_graph(MultipleInput(), (torch.zeros(1, 3), torch.zeros(1, 3)), True)

class MultipleOutput(nn.Module):
    def __init__(self):
        super(MultipleOutput, self).__init__()
        self.Linear_1 = nn.Linear(3, 5)
        self.Linear_2 = nn.Linear(3, 7)

    def forward(self, x):
        return self.Linear_1(x), self.Linear_2(x)

with SummaryWriter(comment='MultipleOutput') as w:
    w.add_graph(MultipleOutput(), dummy_input, True)


class MultipleOutput_shared(nn.Module):
    def __init__(self):
        super(MultipleOutput_shared, self).__init__()
        self.Linear_1 = nn.Linear(3, 5)

    def forward(self, x):
        return self.Linear_1(x), self.Linear_1(x)

with SummaryWriter(comment='MultipleOutput_shared') as w:
    w.add_graph(MultipleOutput_shared(), dummy_input, True)


class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()

    def forward(self, x):
        return x * 2


model = SimpleModel()
dummy_input = (torch.zeros(1, 2, 3),)

with SummaryWriter(comment='constantModel') as w:
    w.add_graph(model, dummy_input, True)


def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        # self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = F.relu(out)
        return out


dummy_input = torch.rand(1, 3, 224, 224)

with SummaryWriter(comment='basicblock') as w:
    model = BasicBlock(3, 3)
    w.add_graph(model, (dummy_input, ), verbose=True)


class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        self.bn = nn.BatchNorm2d(20)

    def forward(self, x):
        x = F.max_pool2d(self.conv1(x), 2)
        x = F.relu(x) + F.relu(-x)
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = self.bn(x)
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        x = F.softmax(x, dim=1)
        return x


class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x


dummy_input = Variable(torch.rand(13, 1, 28, 28))

model = Net1()
with SummaryWriter(comment='Net1') as w:
    w.add_graph(model, (dummy_input, ))

model = Net2()
with SummaryWriter(comment='Net2') as w:
    w.add_graph(model, (dummy_input, ))


class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        self.cnn1 = Net1()

    def forward_once(self, x):
        output = self.cnn1(x)
        return output

    def forward(self, input1, input2):
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        return output1, output2

model = SiameseNetwork()
with SummaryWriter(comment='SiameseNetwork') as w:
    w.add_graph(model, (dummy_input, dummy_input))


dummy_input = torch.Tensor(1, 3, 224, 224)

with SummaryWriter(comment='alexnet') as w:
    model = torchvision.models.alexnet()
    w.add_graph(model, (dummy_input, ))

with SummaryWriter(comment='vgg19') as w:
    model = torchvision.models.vgg19()
    w.add_graph(model, (dummy_input, ))

with SummaryWriter(comment='densenet121') as w:
    model = torchvision.models.densenet121()
    w.add_graph(model, (dummy_input, ))

with SummaryWriter(comment='resnet18') as w:
    model = torchvision.models.resnet18()
    w.add_graph(model, (dummy_input, ))


class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(
            n_categories +
            input_size +
            hidden_size,
            hidden_size)
        self.i2o = nn.Linear(
            n_categories +
            input_size +
            hidden_size,
            output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, category, input, hidden):
        input_combined = torch.cat((category, input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden, input

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


n_letters = 100
n_hidden = 128
n_categories = 10
rnn = RNN(n_letters, n_hidden, n_categories)
cat = torch.Tensor(1, n_categories)
dummy_input = torch.Tensor(1, n_letters)
hidden = torch.Tensor(1, n_hidden)


out, hidden, input = rnn(cat, dummy_input, hidden)
with SummaryWriter(comment='RNN') as w:
    w.add_graph(rnn, (cat, dummy_input, hidden), verbose=False)


lstm = torch.nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
for i in inputs:
    out, hidden = lstm(i.view(1, 1, -1), hidden)

with SummaryWriter(comment='lstm') as w:
    w.add_graph(lstm, (torch.randn(1, 3).view(1, 1, -1), hidden), verbose=True)


import pytest
print('expect error here:')
with pytest.raises(Exception) as e_info:
    dummy_input = torch.rand(1, 1, 224, 224)
    with SummaryWriter(comment='basicblock_error') as w:
        w.add_graph(model, (dummy_input, ))  # error


================================================
FILE: tensorboardX/examples/demo_hparams.py
================================================
from tensorboardX import SummaryWriter
import time
import random


hparam = {'lr': [0.1, 0.01, 0.001],
          'bsize': [1, 2, 4],
          'n_hidden': [100, 200]}

metrics = {'accuracy', 'loss'}

def train(lr, bsize, n_hidden):
    x = random.random()
    return x, x*5

with SummaryWriter() as w:
    for lr in hparam['lr']:
        for bsize in hparam['bsize']:
            for n_hidden in hparam['n_hidden']:
                accu, loss = train(lr, bsize, n_hidden)
                
                w.add_hparams({'lr': lr, 'bsize': bsize, 'n_hidden': n_hidden},
                                    {'accuracy': accu, 'loss': loss})


================================================
FILE: tensorboardX/examples/demo_matplotlib.py
================================================
import matplotlib.pyplot as plt
plt.switch_backend('agg')

fig = plt.figure()

c1 = plt.Circle((0.2, 0.5), 0.2, color='r')
c2 = plt.Circle((0.8, 0.5), 0.2, color='r')

ax = plt.gca()
ax.add_patch(c1)
ax.add_patch(c2)
plt.axis('scaled')


from tensorboardX import SummaryWriter
writer = SummaryWriter()
writer.add_figure('matplotlib', fig)
writer.close()


================================================
FILE: tensorboardX/examples/demo_multiple_embedding.py
================================================
import math
import numpy as np
from tensorboardX import SummaryWriter


def main():
    degrees = np.linspace(0, 3600 * math.pi / 180.0, 3600)
    degrees = degrees.reshape(3600, 1)
    labels = ["%d" % (i) for i in range(0, 3600)]

    with SummaryWriter() as writer:
        # Maybe make a bunch of data that's always shifted in some
        # way, and that will be hard for PCA to turn into a sphere?

        for epoch in range(0, 16):
            shift = epoch * 2 * math.pi / 16.0
            mat = np.concatenate([
                np.sin(shift + degrees * 2 * math.pi / 180.0),
                np.sin(shift + degrees * 3 * math.pi / 180.0),
                np.sin(shift + degrees * 5 * math.pi / 180.0),
                np.sin(shift + degrees * 7 * math.pi / 180.0),
                np.sin(shift + degrees * 11 * math.pi / 180.0)
            ], axis=1)
            writer.add_embedding(
                mat=mat,
                metadata=labels,
                tag="sin",
                global_step=epoch)

            mat = np.concatenate([
                np.cos(shift + degrees * 2 * math.pi / 180.0),
                np.cos(shift + degrees * 3 * math.pi / 180.0),
                np.cos(shift + degrees * 5 * math.pi / 180.0),
                np.cos(shift + degrees * 7 * math.pi / 180.0),
                np.cos(shift + degrees * 11 * math.pi / 180.0)
            ], axis=1)
            writer.add_embedding(
                mat=mat,
                metadata=labels,
                tag="cos",
                global_step=epoch)

            mat = np.concatenate([
                np.tan(shift + degrees * 2 * math.pi / 180.0),
                np.tan(shift + degrees * 3 * math.pi / 180.0),
                np.tan(shift + degrees * 5 * math.pi / 180.0),
                np.tan(shift + degrees * 7 * math.pi / 180.0),
                np.tan(shift + degrees * 11 * math.pi / 180.0)
            ], axis=1)
            writer.add_embedding(
                mat=mat,
                metadata=labels,
                tag="tan",
                global_step=epoch)


if __name__ == "__main__":
    main()

# tensorboard --logdir runs
# Under "Projection, you should see
#  48 tensor found named
#     cos:cos-00000 to cos:cos-00016
#     sin:sin-00000 to sin:sin-00016
#     tan:tan-00000 to tan:tan-00016


================================================
FILE: tensorboardX/examples/demo_nvidia_smi.py
================================================
"""
write gpu and (gpu) memory usage of nvidia cards as scalar
"""
from tensorboardX import SummaryWriter
import time
import torch
try:
    import nvidia_smi
    nvidia_smi.nvmlInit()
    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)  # gpu0
except ImportError:
    print('This demo needs nvidia-ml-py or nvidia-ml-py3')
    exit()


with SummaryWriter() as writer:
    x = []
    for n_iter in range(50):
        x.append(torch.Tensor(1000, 1000).cuda())
        res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
        writer.add_scalar('nv/gpu', res.gpu, n_iter)
        res = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
        writer.add_scalar('nv/gpu_mem', res.used, n_iter)
        time.sleep(0.1)


================================================
FILE: tensorboardX/examples/demo_onnx.py
================================================
from tensorboardX import SummaryWriter

import subprocess
zoo_address = 'https://onnxzoo.blob.core.windows.net/models/opset_8/mnist/mnist.tar.gz'

res = subprocess.call(['wget', '-nc', zoo_address])
assert res == 0, 'cannot download example onnx model from the zoo'
res = subprocess.call(['tar', 'xf', 'mnist.tar.gz', '-C', 'examples/', 'mnist/model.onnx'])


with SummaryWriter() as w:
    w.add_onnx_graph('examples/mnist/model.onnx')
    # w.add_onnx_graph('/Users/dexter/Downloads/resnet50/model.onnx')


================================================
FILE: tensorboardX/examples/demo_purge.py
================================================
from time import sleep
from tensorboardX import SummaryWriter

with SummaryWriter(logdir='runs/purge') as w:
    for i in range(100):
        w.add_scalar('purgetest', i, i)

sleep(1.0)

with SummaryWriter(logdir='runs/purge', purge_step=42) as w:
    # event 42~99 are removed (inclusively)
    for i in range(42, 100):
        w.add_scalar('purgetest', 42, i)


================================================
FILE: tensorboardX/setup.cfg
================================================
[metadata]
license_file = LICENSE

[bdist_wheel]
universal = 1


================================================
FILE: tensorboardX/setup.py
================================================
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import subprocess
import os
from setuptools import setup, find_packages
from setuptools.command.develop import develop
from setuptools.command.install import install

# Dynamically compile protos
def compileProtoBuf():
    res = subprocess.call(['bash', './compile.sh'])
    assert res == 0, 'cannot compile protobuf'

class PostDevelopCommand(develop):
    """Post-installation for development mode."""
    def run(self):
        compileProtoBuf()
        develop.run(self)


class PostInstallCommand(install):
    """Post-installation for installation mode."""
    def run(self):
        compileProtoBuf()
        import os
        os.system("pip install protobuf numpy six")
        install.run(self)

with open('HISTORY.rst') as history_file:
    history = history_file.read()

preparing_PyPI_package = False
version_git = version = '1.8'

if not preparing_PyPI_package:
    if os.path.exists('.git'):
        sha = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode('ascii').strip()
        version_git = version_git + '+' + sha[:7]

    with open('tensorboardX/__init__.py', 'a') as f:
        f.write('\n__version__ = "{}"\n'.format(version_git))

requirements = [
    'numpy',
    'protobuf >= 3.6.1',
    'six',
]

test_requirements = [
    'pytest',
    'matplotlib',
    'crc32c',
]

setup(
    name='tensorboardX',
    version=version_git,
    description='TensorBoardX lets you watch Tensors Flow without Tensorflow',
    long_description=history,
    author='Tzu-Wei Huang',
    author_email='huang.dexter@gmail.com',
    url='https://github.com/lanpa/tensorboardX',
    packages=['tensorboardX'],
    include_package_data=True,
    install_requires=requirements,
    license='MIT license',
    zip_safe=False,
    classifiers=[
        'Development Status :: 2 - Pre-Alpha',
        'Intended Audience :: Developers',
        'License :: OSI Approved :: MIT License',
        'Natural Language :: English',
        'Programming Language :: Python :: 2',
        'Programming Language :: Python :: 2.7',
        'Programming Language :: Python :: 3',
        'Programming Language :: Python :: 3.4',
        'Programming Language :: Python :: 3.5',
        'Programming Language :: Python :: 3.6',
    ],
    cmdclass={
        'develop': PostDevelopCommand,
        'install': PostInstallCommand,
    },
    test_suite='tests',
    tests_require=test_requirements
)


# checklist: update History.rst readme.md
# change preparing_PyPI_package to True
# remove __version__ = "1.old" in __init__.py
# commit
# add tag
# python setup.py sdist bdist_wheel --universal
# twine upload dist/*
# push commit

================================================
FILE: tensorboardX/tensorboardX/__init__.py
================================================
"""A module for visualization with tensorboard
"""

from .record_writer import RecordWriter
from .torchvis import TorchVis
from .writer import FileWriter, SummaryWriter

__version__ = "1.8"  # will be overwritten if run setup.py


================================================
FILE: tensorboardX/tensorboardX/beholder/__init__.py
================================================
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .beholder import Beholder
from .beholder import BeholderHook


================================================
FILE: tensorboardX/tensorboardX/beholder/beholder.py
================================================
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ..proto.summary_pb2 import Summary
from ..proto.summary_pb2 import SummaryMetadata
from ..proto.tensor_pb2 import TensorProto
from ..proto.tensor_shape_pb2 import TensorShapeProto

import os
import time

import numpy as np
# import tensorflow as tf

# from tensorboard.plugins.beholder import im_util
# from . import im_util
from .file_system_tools import read_pickle,\
    write_pickle, write_file
from .shared_config import PLUGIN_NAME, TAG_NAME,\
    SUMMARY_FILENAME, DEFAULT_CONFIG, CONFIG_FILENAME, SUMMARY_COLLECTION_KEY_NAME, SECTION_INFO_FILENAME
from . import video_writing
# from .visualizer import Visualizer


class Beholder(object):

    def __init__(self, logdir):
        self.PLUGIN_LOGDIR = logdir + '/plugins/' + PLUGIN_NAME

        self.is_recording = False
        self.video_writer = video_writing.VideoWriter(
            self.PLUGIN_LOGDIR,
            outputs=[video_writing.FFmpegVideoOutput, video_writing.PNGVideoOutput])

        self.last_image_shape = []
        self.last_update_time = time.time()
        self.config_last_modified_time = -1
        self.previous_config = dict(DEFAULT_CONFIG)

        if not os.path.exists(self.PLUGIN_LOGDIR + '/config.pkl'):
            os.makedirs(self.PLUGIN_LOGDIR)
            write_pickle(DEFAULT_CONFIG,
                         '{}/{}'.format(self.PLUGIN_LOGDIR, CONFIG_FILENAME))

        # self.visualizer = Visualizer(self.PLUGIN_LOGDIR)
    def _get_config(self):
        '''Reads the config file from disk or creates a new one.'''
        filename = '{}/{}'.format(self.PLUGIN_LOGDIR, CONFIG_FILENAME)
        modified_time = os.path.getmtime(filename)

        if modified_time != self.config_last_modified_time:
            config = read_pickle(filename, default=self.previous_config)
            self.previous_config = config
        else:
            config = self.previous_config

        self.config_last_modified_time = modified_time
        return config

    def _write_summary(self, frame):
        '''Writes the frame to disk as a tensor summary.'''
        path = '{}/{}'.format(self.PLUGIN_LOGDIR, SUMMARY_FILENAME)
        smd = SummaryMetadata()
        tensor = TensorProto(
            dtype='DT_FLOAT',
            float_val=frame.reshape(-1).tolist(),
            tensor_shape=TensorShapeProto(
                dim=[TensorShapeProto.Dim(size=frame.shape[0]),
                     TensorShapeProto.Dim(size=frame.shape[1]),
                     TensorShapeProto.Dim(size=frame.shape[2])]
            )
        )
        summary = Summary(value=[Summary.Value(
            tag=TAG_NAME, metadata=smd, tensor=tensor)]).SerializeToString()
        write_file(summary, path)

    @staticmethod
    def stats(tensor_and_name):
        imgstats = []
        for (img, name) in tensor_and_name:
            immax = img.max()
            immin = img.min()
            imgstats.append(
                {
                    'height': img.shape[0],
                    'max': str(immax),
                    'mean': str(img.mean()),
                    'min': str(immin),
                    'name': name,
                    'range': str(immax - immin),
                    'shape': str((img.shape[1], img.shape[2]))
                })
        return imgstats

    def _get_final_image(self, config, trainable=None, arrays=None, frame=None):
        if config['values'] == 'frames':
            # print('===frames===')
            final_image = frame
        elif config['values'] == 'arrays':
            # print('===arrays===')
            final_image = np.concatenate([arr for arr, _ in arrays])
            stat = self.stats(arrays)
            write_pickle(
                stat, '{}/{}'.format(self.PLUGIN_LOGDIR, SECTION_INFO_FILENAME))
        elif config['values'] == 'trainable_variables':
            # print('===trainable===')
            final_image = np.concatenate([arr for arr, _ in trainable])
            stat = self.stats(trainable)
            write_pickle(
                stat, '{}/{}'.format(self.PLUGIN_LOGDIR, SECTION_INFO_FILENAME))
        if len(final_image.shape) == 2:  # Map grayscale images to 3D tensors.
            final_image = np.expand_dims(final_image, -1)

        return final_image

    def _enough_time_has_passed(self, FPS):
        '''For limiting how often frames are computed.'''
        if FPS == 0:
            return False
        else:
            earliest_time = self.last_update_time + (1.0 / FPS)
            return time.time() >= earliest_time

    def _update_frame(self, trainable, arrays, frame, config):
        final_image = self._get_final_image(config, trainable, arrays, frame)
        self._write_summary(final_image)
        self.last_image_shape = final_image.shape

        return final_image

    def _update_recording(self, frame, config):
        '''Adds a frame to the current video output.'''
        # pylint: disable=redefined-variable-type
        should_record = config['is_recording']

        if should_record:
            if not self.is_recording:
                self.is_recording = True
                print('Starting recording using %s',
                      self.video_writer.current_output().name())
            self.video_writer.write_frame(frame)
        elif self.is_recording:
            self.is_recording = False
            self.video_writer.finish()
            print('Finished recording')

    # TODO: blanket try and except for production? I don't someone's script to die
    #       after weeks of running because of a visualization.
    def update(self, trainable=None, arrays=None, frame=None):
        '''Creates a frame and writes it to disk.

        Args:
            trainable: a list of namedtuple (tensors, name).
            arrays: a list of namedtuple (tensors, name).
            frame: lalala
        '''

        new_config = self._get_config()
        if True or self._enough_time_has_passed(self.previous_config['FPS']):
            # self.visualizer.update(new_config)
            self.last_update_time = time.time()
            final_image = self._update_frame(
                trainable, arrays, frame, new_config)
            self._update_recording(final_image, new_config)

    ##############################################################################
    # @staticmethod
    # def gradient_helper(optimizer, loss, var_list=None):
    #   '''A helper to get the gradients out at each step.

    #   Args:
    #     optimizer: the optimizer op.
    #     loss: the op that computes your loss value.

    #   Returns: the gradient tensors and the train_step op.
    #   '''
    #   if var_list is None:
    #     var_list = tf.trainable_variables()

    #   grads_and_vars = optimizer.compute_gradients(loss, var_list=var_list)
    #   grads = [pair[0] for pair in grads_and_vars]

    #   return grads, optimizer.apply_gradients(grads_and_vars)


# implements pytorch backward later
class BeholderHook():
    pass
    # """SessionRunHook implementation that runs Beholder every step.

    # Convenient when using tf.train.MonitoredSession:
    # ```python
    # beholder_hook = BeholderHook(LOG_DIRECTORY)
    # with MonitoredSession(..., hooks=[beholder_hook]) as sess:
    #   sess.run(train_op)
    # ```
    # """
    # def __init__(self, logdir):
    #   """Creates new Hook instance

    #   Args:
    #     logdir: Directory where Beholder should write data.
    #   """
    #   self._logdir = logdir
    #   self.beholder = None

    # def begin(self):
    #   self.beholder = Beholder(self._logdir)

    # def after_run(self, run_context, unused_run_values):
    #   self.beholder.update(run_context.session)


================================================
FILE: tensorboardX/tensorboardX/beholder/file_system_tools.py
================================================
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pickle

# import tensorflow as tf
# from google.protobuf import message


def write_file(contents, path, mode='wb'):
    with open(path, mode) as new_file:
        new_file.write(contents)


def write_pickle(obj, path):
    with open(path, 'wb') as new_file:
        pickle.dump(obj, new_file)


def read_pickle(path, default=None):
    with open(path, 'rb') as pickle_file:
        result = pickle.load(pickle_file)
    return result


================================================
FILE: tensorboardX/tensorboardX/beholder/shared_config.py
================================================
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

PLUGIN_NAME = 'beholder'
TAG_NAME = 'beholder-frame'
SUMMARY_FILENAME = 'frame.summary'
CONFIG_FILENAME = 'config.pkl'
SECTION_INFO_FILENAME = 'section-info.pkl'
SUMMARY_COLLECTION_KEY_NAME = 'summaries_beholder'

DEFAULT_CONFIG = {
    'values': 'trainable_variables',
    'mode': 'variance',
    'scaling': 'layer',
    'window_size': 15,
    'FPS': 10,
    'is_recording': False,
    'show_all': False,
    'colormap': 'magma'
}

SECTION_HEIGHT = 128
IMAGE_WIDTH = 512 + 256

TB_WHITE = 245


================================================
FILE: tensorboardX/tensorboardX/beholder/video_writing.py
================================================
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import os
import subprocess
import time

import numpy as np


class VideoWriter(object):
    """Video file writer that can use different output types.

    Each VideoWriter instance writes video files to a specified directory, using
    the first available VideoOutput from the provided list.
    """

    def __init__(self, directory, outputs):
        self.directory = directory
        # Filter to the available outputs
        self.outputs = [out for out in outputs if out.available()]
        if not self.outputs:
            raise IOError('No available video outputs')
        self.output_index = 0
        self.output = None
        self.frame_shape = None

    def current_output(self):
        return self.outputs[self.output_index]

    def write_frame(self, np_array):
        # Reset whenever we encounter a new frame shape.
        if self.frame_shape != np_array.shape:
            if self.output:
                self.output.close()
            self.output = None
            self.frame_shape = np_array.shape
            print('Starting video with frame shape: %s', self.frame_shape)
        # Write the frame, advancing across output types as necessary.
        original_output_index = self.output_index
        for self.output_index in range(original_output_index, len(self.outputs)):
            try:
                if not self.output:
                    new_output = self.outputs[self.output_index]
                    if self.output_index > original_output_index:
                        print('Falling back to video output %s',
                              new_output.name())
                    self.output = new_output(self.directory, self.frame_shape)
                self.output.emit_frame(np_array)
                return
            except (IOError, OSError) as e:
                print('Video output type %s not available: %s',
                      self.current_output().name(), str(e))
                if self.output:
                    self.output.close()
                self.output = None
        raise IOError('Exhausted available video outputs')

    def finish(self):
        if self.output:
            self.output.close()
        self.output = None
        self.frame_shape = None
        # Reconsider failed outputs when video is manually restarted.
        self.output_index = 0


class VideoOutput(object):
    """Base class for video outputs supported by VideoWriter."""

    __metaclass__ = abc.ABCMeta

    # Would add @abc.abstractmethod in python 3.3+
    @classmethod
    def available(cls):
        raise NotImplementedError()

    @classmethod
    def name(cls):
        return cls.__name__

    @abc.abstractmethod
    def emit_frame(self, np_array):
        raise NotImplementedError()

    @abc.abstractmethod
    def close(self):
        raise NotImplementedError()


class PNGVideoOutput(VideoOutput):
    """Video output implemented by writing individual PNGs to disk."""

    @classmethod
    def available(cls):
        return True

    def __init__(self, directory, frame_shape):
        del frame_shape  # unused
        self.directory = directory + '/video-frames-{}'.format(time.time())
        self.frame_num = 0
        os.makedirs(self.directory)

    def emit_frame(self, np_array):
        filename = self.directory + '/{:05}.png'.format(self.frame_num)
        self._write_image(np_array.astype(np.uint8), filename)
        self.frame_num += 1

    def _write_image(self, im, filename):
        from PIL import Image
        Image.fromarray(im).save(filename)

    def close(self):
        pass


class FFmpegVideoOutput(VideoOutput):
    """Video output implemented by streaming to FFmpeg with .mp4 output."""

    @classmethod
    def available(cls):
        # Silently check if ffmpeg is available.
        try:
            with open(os.devnull, 'wb') as devnull:
                subprocess.check_call(
                    ['ffmpeg', '-version'], stdout=devnull, stderr=devnull)
            return True
        except (OSError, subprocess.CalledProcessError):
            return False

    def __init__(self, directory, frame_shape):
        self.filename = directory + '/video-{}.webm'.format(time.time())
        if len(frame_shape) != 3:
            raise ValueError(
                'Expected rank-3 array for frame, got %s' % str(frame_shape))
        # Set input pixel format based on channel count.
        if frame_shape[2] == 1:
            pix_fmt = 'gray'
        elif frame_shape[2] == 3:
            pix_fmt = 'rgb24'
        else:
            raise ValueError('Unsupported channel count %d' % frame_shape[2])

        command = [
            'ffmpeg',
            '-y',  # Overwite output
            # Input options - raw video file format and codec.
            '-f', 'rawvideo',
            '-vcodec', 'rawvideo',
            # Width x height.
            '-s', '%dx%d' % (frame_shape[1], frame_shape[0]),
            '-pix_fmt', pix_fmt,
            '-r', '15',  # Frame rate: arbitrarily use 15 frames per second.
            '-i', '-',  # Use stdin.
            '-an',  # No audio.
            # Output options - use lossless VP9 codec inside .webm.
            '-vcodec', 'libvpx-vp9',
            '-lossless', '1',
            # Using YUV is most compatible, though conversion from RGB skews colors.
            '-pix_fmt', 'yuv420p',
            self.filename
        ]
        PIPE = subprocess.PIPE
        self.ffmpeg = subprocess.Popen(
            command, stdin=PIPE, stdout=PIPE, stderr=PIPE)

    def _handle_error(self):
        _, stderr = self.ffmpeg.communicate()
        bar = '=' * 40
        print('Error writing to FFmpeg:\n{}\n{}\n{}',
              bar, stderr, bar)

    def emit_frame(self, np_array):
        try:
            self.ffmpeg.stdin.write(np_array.tobytes())
            self.ffmpeg.stdin.flush()
        except IOError:
            self._handle_error()
            raise IOError('Failure invoking FFmpeg')

    def close(self):
        if self.ffmpeg.poll() is None:
            # Close stdin and consume and discard stderr/stdout.
            self.ffmpeg.communicate()
        self.ffmpeg = None


================================================
FILE: tensorboardX/tensorboardX/caffe2_graph.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import copy
import logging
import os
import re
import six

from builtins import bytes
from caffe2.proto import caffe2_pb2
from caffe2.python import core, workspace

from .proto.graph_pb2 import GraphDef
from .proto.node_def_pb2 import NodeDef
from .proto.tensor_shape_pb2 import TensorShapeProto


def _make_unique_name(seen, name, min_version=0):
    '''
    Make the name unique by appending a unique number to the name. Used for SSA.

    Args:
        seen (set): Set of names that have already been used (with respect to
            some context).
        name (string): The name to make unique
        min_version (number): Starting index. Is incremented continually until
            it can make the resulting name unique relative to 'seen'.

    Returns:
        x (string): A version of name that is not in seen.
    '''
    assert name is not None
    i = min_version
    x = '%s_%d' % (name, i) if i else name
    while x in seen:
        i += 1
        x = '%s_%d' % (name, i)
    seen.add(x)
    return x


def _rename_tensorflow_style(shapes, blob_name_tracker, ops):
    '''
    Convert some of the common names in Caffe2 to tensorflow.
    NOTE: The common names in both Caffe2 and Tensorflow are currently
        hardcoded, if either side changes at some point, then this code should
        change as well.

    Args:
        shapes: Dictionary mapping blob names to their shapes/dimensions.
        blob_name_tracker: Dictionary of all unique blob names (with respect to
            some context).
        ops: List of Caffe2 operators

    Returns:
        None. The _rename_all() call modifies blob_name_tracker and ops in-place.
    '''
    WEIGHT = re.compile(r"(_w)$")
    WEIGHT_ = re.compile(r"(_w_)")
    BN = re.compile(r"(_bn)$")
    BN_ = re.compile(r"(_bn_)")
    BIAS = re.compile(r"(_b)$")
    BIAS_ = re.compile(r"(_b_)")
    SCALE = re.compile(r"(_s)$")
    SCALE_ = re.compile(r"(_s_)")
    SUM = re.compile(r"(_sum)$")
    SUM_ = re.compile(r"(_sum_)")
    BRANCH = re.compile(r"(_branch)")

    def f(name):
        inter_name = WEIGHT_.sub('/weight_', WEIGHT.sub('/weight', name))
        inter_name = BN_.sub('/batchnorm_', BN.sub('/batchnorm', inter_name))
        inter_name = BIAS_.sub('/bias_', BIAS.sub('/bias', inter_name))
        inter_name = SCALE_.sub('/scale_', SCALE.sub('/scale', inter_name))
        inter_name = SUM_.sub('/sum_', SUM.sub('/sum', inter_name))
        new_name = BRANCH.sub('/branch', inter_name)
        return new_name
    _rename_all(shapes, blob_name_tracker, ops, f)


def _convert_to_ssa(shapes, blob_name_tracker, ops):
    '''
    Convert an operator graph to SSA (i.e. out-of-place).
    i.e. blobs will be renamed so that each blob is produced only once.

    Args:
        shapes: Dictionary mapping blob names to their shapes/dimensions.
        blob_name_tracker: Dictionary of all unique blob names (with respect to
            some context).
        ops: List of Caffe2 operators

    Returns:
        None. Modifies blob_name_tracker and ops in-place.
    '''
    ir = core.IR(ops)
    seen = set()
    versioned = {}
    new_shapes = {}
    new_blob_name_tracker = {}

    def ssa_name(name, versions):
        assert name in versions
        version = versions[name]
        if (name, version) in versioned:
            return versioned[(name, version)]
        # Always setting name2 = `{name}_{version}` would work, but we also try
        # to avoid a trailing `_0`, so we have to be careful not to introduce
        # name collisions, such as (foo_1, 0) = foo_1 = (foo, 1).
        # Note: operator names (if any) will be handled later.
        new_name = _make_unique_name(seen, name, min_version=version)
        versioned[(name, version)] = new_name
        # Transfer shape.
        if name in shapes:
            new_shapes[new_name] = shapes[name]
        if blob_name_tracker and name in blob_name_tracker:
            new_blob_name_tracker[new_name] = blob_name_tracker[name]
        return new_name

    for (op, ssa) in zip(ops, ir.ssa):
        assert op is ssa.op
        inputs = list(op.input)
        outputs = list(op.output)
        del op.input[:]
        del op.output[:]
        op.input.extend(ssa_name(name, ssa.in_versions) for name in inputs)
        op.output.extend(ssa_name(name, ssa.out_versions) for name in outputs)

    shapes.clear()
    shapes.update(new_shapes)
    if blob_name_tracker:
        blob_name_tracker.clear()
        blob_name_tracker.update(new_blob_name_tracker)


def _get_blob_names(ops):
    '''
    Get all the operator input and output blobs and perform dedup on their names.

    Args:
        ops: List of Caffe2 operators to extract inputs and outputs from

    Returns:
        set containing distinct inputs and outputs from 'ops'
    '''
    names = set()
    for op in ops:
        names.update(op.input)
        names.update(op.output)
    return {name: name for name in names}


def _remap_keys(old_dict, rename_fn):
    '''
    Rename keys of 'old_dict' according to 'rename_fn'.

    Args:
        old_dict: Dictionary (i.e. containing blob_name -> blob_name
            relationships.)
        remap_fn: Function string -> string for renaming.

    Returns:
        None. Modifies old_dict in-place.
    '''
    new_dict = {rename_fn(key): value for key,
                value in six.iteritems(old_dict)}
    old_dict.clear()
    old_dict.update(new_dict)


def _rename_all(shapes, blob_name_tracker, ops, rename_fn):
    '''
    Rename all the names in the operators.

    Args:
        shapes: Dictionary mapping blob names to their shapes/dimensions.
        blob_name_tracker: Dictionary of all unique blob names (with respect to
            some context).
        ops: List of Caffe2 operators
        rename_fn: Function string -> string that specifies how to rename

    Returns:
        None. Modifies shapes, blob_name_tracker and ops in-place using the
            specified 'rename_fn'.
    '''
    seen = set()
    renamed = {}

    def g(name):
        """ Collision-free version of f.
        """
        if name is None:
            return None
        if name in renamed:
            return renamed[name]
        new_name = _make_unique_name(seen, rename_fn(name))
        renamed[name] = new_name
        return new_name

    for op in ops:
        inputs = list(op.input)
        outputs = list(op.output)
        del op.input[:]
        del op.output[:]
        op.input.extend(g(name) for name in inputs)
        op.output.extend(g(name) for name in outputs)

    _remap_keys(shapes, g)
    if blob_name_tracker:
        _remap_keys(blob_name_tracker, g)
    # Rename all operator names (if any) independently so that the
    # unique-fication happens only once in _fill_missing_operator_names().
    seen.clear()
    renamed.clear()
    for op in ops:
        op.name = g(op.name)


def _add_gradient_scope(shapes, blob_name_tracker, ops):
    """
    For all operators or blobs with name containing "_grad", add a
    "GRADIENTS/" scope.
    Note: breaks graph execution since the blob -> gradient mapping is
    hardcoded.

    Args:
        shapes: Dictionary mapping blob names to their shapes/dimensions.
        blob_name_tracker: Dictionary of all unique blob names (with respect to
            some context).
        ops: List of Caffe2 operators

    Returns:
        None. Modifies shapes, blob_name_tracker and ops in-place by renaming.
    """
    def f(name):
        if '_grad' in name:
            return 'GRADIENTS/{}'.format(name)
        else:
            return name
    _rename_all(shapes, blob_name_tracker, ops, f)


def _replace_colons(shapes, blob_name_tracker, ops, repl):
    '''
    `:i` has a special meaning in Tensorflow. This function replaces all colons
    with $ to avoid any possible conflicts.

    Args:
        shapes: Dictionary mapping blob names to their shapes/dimensions.
        blob_name_tracker: Dictionary of all unique blob names (with respect to
            some context).
        ops: List of Caffe2 operators
        repl: String representing the text to replace ':' with. Usually this is
            '$'.

    Returns:
        None. Modifies blob_name_tracker in-place.

    '''
    def f(name):
        return name.replace(':', repl)
    _rename_all(shapes, blob_name_tracker, ops, f)


def _fill_missing_operator_names(ops):
    '''
    Give missing operators a name.
    We expect C2 operators to be generally unnamed. This gives them a scope
    (inferred from their outputs) and a name after their type. Duplicates will
    be postfixed by an index.

    Args:
        ops: List of Caffe2 operators to assign names to.

    Returns:
        None: Modifies 'ops' in-place.
    '''
    seen = set()
    for op in ops:
        # Make sure operator names don't collide with blobs.
        seen.update(op.input)
        seen.update(op.output)
    for op in ops:
        if op.name:
            name = op.name
        elif op.output or op.input:
            name_list = [os.path.dirname(name)
                         for name in op.output or op.input]
            scope = os.path.commonprefix(name_list)
            name = os.path.join(scope, op.type)
        else:
            name = op.type
        assert(name)
        op.name = _make_unique_name(seen, name)


def _tf_device(device_option):
    '''
    Handle the devices.

    Args:
        device_option (caffe2_pb2.DeviceOption): DeviceOption protobuf,
            associated to an operator, that contains information such as
            device_type (optional), cuda_gpu_id (optional), node_name (optional,
            tells which node the operator should execute on). See caffe2.proto
            in caffe2/proto for the full list.

    Returns:
        Formatted string representing device information contained in
            device_option.
    '''
    if not device_option.HasField("device_type"):
        return ""
    if device_option.device_type == caffe2_pb2.CPU or device_option.device_type == caffe2_pb2.MKLDNN:
        return "/cpu:*"
    if device_option.device_type == caffe2_pb2.CUDA:
        return "/gpu:{}".format(device_option.device_id)
    raise Exception("Unhandled device", device_option)


def _add_tf_shape(attr_dict, ints):
    '''
    Converts a list of ints to a TensorShapeProto representing the dimensions of
    a blob/object.

    Args:
        attr_dict: Dictionary to update (usually attributes of a Node)
        ints: List of integers representing dimensions of some object.

    Returns:
        None. Modifies attr_dict in-place.
    '''
    shape_proto = TensorShapeProto()
    for i in ints:
        dim = TensorShapeProto.Dim()
        dim.size = i
        shape_proto.dim.extend([dim])
    attr_dict['_output_shapes'].list.shape.extend([shape_proto])


def _set_tf_attr(attr_dict, arg):
    '''
    Add attributes to a node. Key is the arg.name, and values can be shape,
        floats, strings, ints or an empty list.

    Args:
        attr_dict: Dictionary to update (usually attributes of a Node)
        arg: Object with name and data fields.

    Returns:
        None. Modifies attr_dict in-place.
    '''
    k = arg.name
    if k == 'shape' and arg.ints:
        _add_tf_shape(attr_dict, arg.ints)
        return
    # Float
    if arg.HasField("f"):
        attr_dict[k].f = arg.f
        return
    # Integer
    if arg.HasField("i"):
        attr_dict[k].i = arg.i
        return
    # String
    if arg.HasField("s"):
        attr_dict[k].s = (
            arg.s if isinstance(arg.s, bytes) else str(arg.s).encode('utf-8')
        )
        return
    if arg.floats:
        attr_dict[k].list.f.extend(arg.floats)
        return
    if arg.ints:
        attr_dict[k].list.i.extend(arg.ints)
        return
    if arg.strings:
        attr_dict[k].list.s.extend(
            s if isinstance(s, bytes) else str(s).encode('utf-8')
            for s in arg.strings
        )
        return
    # The value is an empty list.
    attr_dict[k].list.s.extend([])


def _operator_to_node(shapes, op):
    '''
    Converts an operator to a node in a TF graph.

    Args:
        shapes: Dictionary mapping blob names to their shapes/dimensions.
        op: The Caffe2 operator to convert to a TF graph node.

    Returns:
        n: The TF graph node created from op.
    '''
    assert op.name, op
    n = NodeDef()
    n.name = op.name
    n.input.extend(op.input)
    n.op = op.type
    n.device = _tf_device(op.device_option)
    if shapes:
        # Add shapes in order.
        for output in op.output:
            if output not in shapes:
                break
            _add_tf_shape(n.attr, shapes[output])
    for arg in op.arg:
        _set_tf_attr(n.attr, arg)
    return n


def _operator_to_node_simp(op, inter_blobs, seen):
    '''
    Convert the operators to nodes.

    Args:
        op: Caffe2 operator to convert to node
        inter_blobs: Set of intermediate blobs
        seen: Names that have already been used and are not unique

    Returns:
        nodes: Nodes representing 'op' and the outputs of 'op'
    '''
    assert op
    nodes = []
    outputs = [o for o in op.output if o not in inter_blobs]
    seen.update(outputs)
    len_outputs = len(outputs)
    if len_outputs == 1:
        n = NodeDef()
        n.name = outputs[0]
        # Here we are sure the name is unique.
        n.input.extend(op.input)
        n.op = op.type
        n.device = _tf_device(op.device_option)
        for arg in op.arg:
            _set_tf_attr(n.attr, arg)
        nodes.append(n)
    elif len_outputs > 1:
        # Create a name that is likely unique
        if op.name:
            name = op.name
        else:
            name_list = [name for name in outputs]
            scope = os.path.commonprefix(name_list)
            name = os.path.join(scope, op.type)
        assert(name)
        op.name = _make_unique_name(seen, name)
        device = _tf_device(op.device_option)

        # Create additional output nodes
        for output in outputs:
            n = NodeDef()
            n.name = output
            n.input.extend([op.name])
            n.op = 'Blob'
            n.device = device
            nodes.append(n)

        # Node for the current op
        n = NodeDef()
        n.name = op.name
        n.input.extend(op.input)
        n.op = op.type
        n.device = device
        for arg in op.arg:
            _set_tf_attr(n.attr, arg)
        nodes.append(n)

    return nodes


def _blob_to_node(producing_ops, shapes, name):
    '''
    Converts a blob (operator input or output) to a node in a TF graph.

    Args:
        producing_ops: Dictionary of blob name to list of
            (producing_op, blob_index within producing_op.output) mapping.
        shapes: Dictionary mapping blob names to their shapes/dimensions.
        name: String representing the name of this blob.

    Returns:
        n: The TF graph node created from this blob.
    '''
    assert name
    n = NodeDef()
    n.name = name
    # Get all ops that have the blob corresponding to 'name' as one of their
    # outputs. See _operators_to_graph_def.
    produced_by = producing_ops.get(name, [])
    if len(produced_by) > 0:
        n.op = 'Blob'
    else:
        # This blob is not produced but is instead a TF Placeholder where a
        # value is passed in.
        n.op = 'Placeholder'
    n.input.extend('%s:%d' % (p_op.name, i) for p_op, i in produced_by)
    if produced_by:
        device = produced_by[0][0].device_option
        if (all(producer[0].device_option == device for producer in produced_by)):
            n.device = _tf_device(device)
    if shapes and name in shapes:
        _add_tf_shape(n.attr, shapes[name])
    return n


def _clear_debug_info(ops, perform_clear):
    '''
    Removes debug information from operators, they are copious.

    Args:
        ops: List of Caffe2 operators
        perform_clear: Boolean passed from _operators_to_graph_def specifying
            whether to remove the debug information. This boolean is passed into
            this function to reduce the complexity of _operators_to_graph_def.

    Returns:
        None. Modifies the list of Caffe2 operators in-place and removes the
        'debug_info' field.

    '''
    if not perform_clear:
        return

    for op in ops:
        if op.HasField('debug_info'):
            op.ClearField('debug_info')


def _check_if_forward(blob):
    '''
    Blobs with names containing '_m' or 'grad' are part of the backward pass.
        This function references facebookresearch/Detectron/detectron/utils/net.py.

    Args:
        blob: The blob to inspect

    Returns:
        Boolean representing whether this blob is part of the forward pass
    '''
    #
    return (blob.find('__m') < 0 or blob.find('grad') < 0)


def _check_if_cpu(blob):
    '''
    Check if the blob's name starts with '_gpu'.

    Args:
        blob: The blob to inspect

    Returns:
        Boolean representing whether this blob is associated with a gpu
    '''
    return not blob.startswith('_gpu')


def _compute_in_out(ops):
    '''
    Find the input, intermediate and output nodes of a set of operators.

    Args:
        ops: List of Caffe2 operators to look through

    Returns:
        input_blobs: The input nodes of the set of operators
        inter_blobs: The intermediate nodes of the set of operators
        output_blobs: The output nodes of the set of operators
    '''
    in_blobs = set()
    out_blobs = set()

    for op in ops:
        for input_blob in op.input:
            in_blobs.add(input_blob)
        for output_blob in op.output:
            out_blobs.add(output_blob)

    input_blobs = list(in_blobs.difference(out_blobs))
    output_blobs = list(out_blobs.difference(in_blobs))
    inter_blobs = {b for b in output_blobs if b.startswith('_')}
    output_blobs = [b for b in output_blobs if b not in inter_blobs]

    return input_blobs, inter_blobs, output_blobs


def _filter_ops(ops, filter_fn, perform_filter):
    '''
    Filter unwanted operators based on criteria in 'filter_fn'.

    Args:
        ops: List of Caffe2 operators to filter
        filter_fn: Criteria function for whether inputs/outputs in an operator
            should be filtered.
        perform_filter: Boolean passed from _operators_to_graph_def specifying
            whether to filter operators

    Returns:
        new_ops: Subset of ops containing a subset of their inputs and outputs.
    '''
    if not perform_filter:
        return ops

    new_ops = []
    for op in ops:
        inputs = list(op.input)
        outputs = list(op.output)
        del op.input[:]
        del op.output[:]
        new_inputs = [i for i in inputs if filter_fn(i)]
        new_outputs = [o for o in outputs if filter_fn(o)]

        # Only add the op if output is not empty
        if new_outputs:
            op.input.extend(new_inputs)
            op.output.extend(new_outputs)
            new_ops.append(op)

    return new_ops


def _operators_to_graph_def(
    shapes,
    ops,
    colon_replacement='$',
    with_ssa=True,
    with_gradient_scope=True,
    blob_name_tracker=None,
    show_simplified=False,
    custom_rename=None
):
    '''
    Main function to convert set of operators to a graph.

    Args:
        shapes: Dictionary mapping blob names to their shapes/dimensions.
        ops: List of Caffe2 operators, representing some computation graph
        ### **kwargs (model_to_graph_def, nets_to_graph_def, protos_to_graph_def) ###
        colon_replacement: Symbol to replace ':' with. ':i' in TF has a special
            meaning, so we need to replace it with a non-conflicting symbol.
        with_ssa: Boolean
        with_gradient_scope: Boolean
        blob_name_tracker: Dictionary tracking names of blobs (inputs/outputs
            from operators)
        show_simplified: Whether to show a simplified version of the model graph
            Sets all of the following values:
                clear_debug_info: Boolean representing whether to silence debug
                    info (which can be very verbose)
                show_forward_only: Boolean representing whether to only show
                    blobs involved in the forward pass
                show_cpu_only: Boolean representing whether to only show blobs
                    that are not associated with a gpu
                use_tensorflow_naming: Boolean representing whether to convert
                    some common Caffe2 naming conventions to their Tensorflow
                    counterparts
        custom_rename: Function string -> string that defines a custom
            renaming function to use.

    Returns:
        current_graph: GraphDef representing the computation graph formed by the
            set of operators.
    '''
    if blob_name_tracker is not None:
        blob_name_tracker.clear()
    else:
        blob_name_tracker = {}

    blob_name_tracker.update(_get_blob_names(ops))

    _clear_debug_info(ops, show_simplified)  # clear_debug_info
    ops = _filter_ops(ops, _check_if_forward,
                      show_simplified)  # show_forward_only
    ops = _filter_ops(ops, _check_if_cpu, show_simplified)  # show_cpu_only
    if custom_rename:
        _rename_all(shapes, blob_name_tracker, ops, custom_rename)
    if colon_replacement:
        _replace_colons(shapes, blob_name_tracker, ops, colon_replacement)
    if with_ssa:
        _convert_to_ssa(shapes, blob_name_tracker, ops)
    if with_gradient_scope:
        _add_gradient_scope(shapes, blob_name_tracker, ops)
    _fill_missing_operator_names(ops)
    if show_simplified:  # use_tensorflow_naming
        _rename_tensorflow_style(shapes, blob_name_tracker, ops)
    producing_ops = {}
    blobs = []
    input_blobs, inter_blobs, _ = _compute_in_out(ops)
    current_graph = GraphDef()
    seen = set(input_blobs)
    for op in ops:
        nodes_from_op = _operator_to_node_simp(op, inter_blobs, seen) if \
            show_simplified else \
            [_operator_to_node(shapes, op)]  # .extend() expects an iterable
        current_graph.node.extend(nodes_from_op)
        for input_blob in op.input:
            blobs.append(input_blob)
        for i, output_blob in enumerate(op.output):
            blobs.append(output_blob)
            producing_ops.setdefault(output_blob, []).append((op, i))

    if show_simplified:
        # Show a cleaner, easier-to-interpret version of the model graph
        blobs = input_blobs

    for blob in blobs:
        current_graph.node.extend([_blob_to_node(producing_ops, {}, blob)])

    return current_graph


def _propagate_device_option(net_def):
    '''
    Propagate the device options from net to operators.

    Args:
        net_def: A caffe2_pb2.NetDef representing a computation graph. The graph
            consists of Caffe2 operators.

    Returns:
        None. Iterates through all ops contained within the net. For each op,
            modifies the op device_option in-place to be the net device_option
            if the op has no pre-existing device_option, and leaves the op as-is
            if it already has a device_option.
    '''
    if not net_def.HasField("device_option"):
        return
    for op in net_def.op:
        if not op.HasField("device_option"):
            op.device_option.CopyFrom(net_def.device_option)


def _try_get_shapes(nets):
    '''
    Get missing shapes for all blobs contained in the nets.

    Args:
        nets: List of core.Net to extract blob shape information from.

    Returns:
        Dictionary containing blob name to shape/dimensions mapping. The net
            is a computation graph that is composed of operators, and the
            operators have input and output blobs, each with their own dims.
    '''
    try:
        # Note: this will inspect the workspace for better or worse.
        # We don't care about the types, only the shapes
        shapes, _ = workspace.InferShapesAndTypes(nets)
        return shapes
    except Exception as e:
        logging.warning('Failed to compute shapes: %s', e)
        return {}


def model_to_graph_def(model, **kwargs):
    '''
    Convert a Caffe2 model to a Tensorflow graph. This function extracts
    'param_init_net' and 'net' from the model and passes it to nets_to_graph()
    for further processing.

    Args:
        model (cnn.CNNModelHelper, model_helper.ModelHelper): The model to
            extract the nets (instances of core.Net) from.

    Returns:
        Call to nets_to_graph_def() with extracted 'param_init_net', 'net' and
            **kwargs. See _operators_to_graph_def for detailed **kwargs.
    '''
    nets = [model.param_init_net, model.net]
    return nets_to_graph_def(nets, **kwargs)


def nets_to_graph_def(nets, shapes=None, **kwargs):
    '''
    Convert a set of Caffe2 nets to a Tensorflow graph.

    Args:
        nets: List of core.Nets. core.Net is a wrapper around a NetDef protobuf.
            The corresponding protobuf can be extracted using .Proto().
        shapes: Dictionary mapping blob names to their shapes/dimensions.

    Returns:
        Call to protos_to_graph_def() with the extracted NetDef protobufs and
            **kwargs. See _operators_to_graph_def for detailed **kwargs.
    '''
    # if shapes is None:
    #     shapes = _try_get_shapes(nets)
    # _try_get_shapes(nets) depends on workspace.InferShapesAndTypes(nets),
    # which is currently broken (segfault). We omit the shapes for now.
    shapes = {}
    nets = [copy.deepcopy(net.Proto()) for net in nets]
    shapes = copy.deepcopy(shapes)
    return protos_to_graph_def(nets, shapes, **kwargs)


def protos_to_graph_def(net_defs, shapes=None, **kwargs):
    '''
    Convert a set of Caffe2 net definitions to a Tensorflow graph.

    Args:
        net_defs: List of caffe2_pb2.NetDef protobufs representing computation
            graphs.
        shapes: Dictionary mapping blob names to their shapes/dimensions.

    Returns:
        Call to _operators_to_graph_def() with the extracted operators from the
            NetDefs and **kwargs. See _operators_to_graph_def for detailed
            **kwargs.
    '''
    for net in net_defs:
        _propagate_device_option(net)
    shapes = copy.deepcopy(shapes or {})
    ops = [op for net_def in net_defs for op in net_def.op]
    return _operators_to_graph_def(shapes, ops, **kwargs)


================================================
FILE: tensorboardX/tensorboardX/crc32c.py
================================================
# https://www.ietf.org/rfc/rfc3309.txt
import array
import os

try:
    if os.environ.get('CRC32C_SW_MODE', None) is None:
        os.environ['CRC32C_SW_MODE'] = 'auto'
    from crc32c import crc32 as _crc32c_native
except ImportError:
    _crc32c_native = None


CRC_TABLE = (
    0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
    0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
    0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
    0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
    0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
    0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
    0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
    0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
    0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
    0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
    0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
    0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
    0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
    0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
    0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
    0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
    0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
    0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
    0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
    0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
    0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
    0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
    0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
    0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
    0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
    0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
    0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
    0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
    0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
    0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
    0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
    0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
    0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
    0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
    0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
    0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
    0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
    0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
    0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
    0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
    0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
    0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
    0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
    0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
    0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
    0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
    0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
    0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
    0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
    0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
    0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
    0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
    0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
    0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
    0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
    0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
    0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
    0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
    0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
    0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
    0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
    0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
    0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
    0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
)

CRC_INIT = 0

_MASK = 0xFFFFFFFF


def crc_update(crc, data):
    """Update CRC-32C checksum with data.

    Args:
      crc: 32-bit checksum to update as long.
      data: byte array, string or iterable over bytes.

    Returns:
      32-bit updated CRC-32C as long.
    """

    if type(data) != array.array or data.itemsize != 1:
        buf = array.array("B", data)
    else:
        buf = data

    crc ^= _MASK
    for b in buf:
        table_index = (crc ^ b) & 0xff
        crc = (CRC_TABLE[table_index] ^ (crc >> 8)) & _MASK
    return crc ^ _MASK


def crc_finalize(crc):
    """Finalize CRC-32C checksum.

    This function should be called as last step of crc calculation.

    Args:
      crc: 32-bit checksum as long.

    Returns:
      finalized 32-bit checksum as long
    """
    return crc & _MASK


def _crc32c(data):
    """Compute CRC-32C checksum of the data.

    Args:
      data: byte array, string or iterable over bytes.

    Returns:
      32-bit CRC-32C checksum of data as long.
    """
    return crc_finalize(crc_update(CRC_INIT, data))


crc32c = _crc32c if _crc32c_native is None else _crc32c_native


================================================
FILE: tensorboardX/tensorboardX/embedding.py
================================================
import os


def make_tsv(metadata, save_path, metadata_header=None):
    if not metadata_header:
        metadata = [str(x) for x in metadata]
    else:
        assert len(metadata_header) == len(metadata[0]), \
            'len of header must be equal to the number of columns in metadata'
        metadata = ['\t'.join(str(e) for e in l)
                    for l in [metadata_header] + metadata]
    import sys
    if sys.version_info[0] == 3:
        with open(os.path.join(save_path, 'metadata.tsv'), 'w', encoding='utf8') as f:
            for x in metadata:
                f.write(x + '\n')
    else:
        with open(os.path.join(save_path, 'metadata.tsv'), 'wb') as f:
            for x in metadata:
                f.write((x + '\n').encode('utf-8'))


# https://github.com/tensorflow/tensorboard/issues/44 image label will be squared
def make_sprite(label_img, save_path):
    import math
    import numpy as np
    from .x2num import make_np
    from .utils import make_grid
    from PIL import Image
    # this ensures the sprite image has correct dimension as described in
    # https://www.tensorflow.org/get_started/embedding_viz
    # There are some constraints for the sprite image:
    # 1. The sprite image should be square.
    # 2. Each image patch in the sprite image should be square.
    # 2. The content is row major order, so we can padding the image on the
    #    bottom, but not on the right, otherwise, TB will treat some padded location
    #    as images to be shown.
    # args: label_img: tensor in NCHW

    assert label_img.shape[2] == label_img.shape[3], 'Image should be square, see tensorflow/tensorboard#670'
    total_pixels = label_img.shape[0] * label_img.shape[2] * label_img.shape[3]
    pixels_one_side = total_pixels ** 0.5
    number_of_images_per_row = int(math.ceil(pixels_one_side / label_img.shape[3]))
    arranged_img_CHW = make_grid(make_np(label_img), ncols=number_of_images_per_row)
    arranged_img_HWC = arranged_img_CHW.transpose(1, 2, 0)  # chw -> hwc

    arranged_augment_square_HWC = np.ndarray((arranged_img_CHW.shape[2], arranged_img_CHW.shape[2], 3))
    arranged_augment_square_HWC[:arranged_img_HWC.shape[0], :, :] = arranged_img_HWC
    im = Image.fromarray(np.uint8((arranged_augment_square_HWC * 255).clip(0, 255)))
    im.save(os.path.join(save_path, 'sprite.png'))


def append_pbtxt(metadata, label_img, save_path, subdir, global_step, tag):
    from posixpath import join
    with open(os.path.join(save_path, 'projector_config.pbtxt'), 'a') as f:
        # step = os.path.split(save_path)[-1]
        f.write('embeddings {\n')
        f.write('tensor_name: "{}:{}"\n'.format(
            tag, str(global_step).zfill(5)))
        f.write('tensor_path: "{}"\n'.format(join(subdir, 'tensors.tsv')))
        if metadata is not None:
            f.write('metadata_path: "{}"\n'.format(
                join(subdir, 'metadata.tsv')))
        if label_img is not None:
            f.write('sprite {\n')
            f.write('image_path: "{}"\n'.format(join(subdir, 'sprite.png')))
            f.write('single_image_dim: {}\n'.format(label_img.shape[3]))
            f.write('single_image_dim: {}\n'.format(label_img.shape[2]))
            f.write('}\n')
        f.write('}\n')


def make_mat(matlist, save_path):
    with open(os.path.join(save_path, 'tensors.tsv'), 'w') as f:
        for x in matlist:
            x = [str(i.item()) for i in x]
            f.write('\t'.join(x) + '\n')


================================================
FILE: tensorboardX/tensorboardX/event_file_writer.py
================================================
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Writes events to disk in a logdir."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import socket
import threading
import time

import six

from .proto import event_pb2
from .record_writer import RecordWriter, directory_check


class EventsWriter(object):
    '''Writes `Event` protocol buffers to an event file.'''

    def __init__(self, file_prefix, filename_suffix=''):
        '''
        Events files have a name of the form
        '/some/file/path/events.out.tfevents.[timestamp].[hostname]'
        '''
        self._file_name = file_prefix + ".out.tfevents." + str(time.time())[:10] + "." +\
            socket.gethostname() + filename_suffix
        self._num_outstanding_events = 0
        self._py_recordio_writer = RecordWriter(self._file_name)
        # Initialize an event instance.
        self._event = event_pb2.Event()
        self._event.wall_time = time.time()
        self._event.file_version = 'brain.Event:2'
        self._lock = threading.Lock()
        self.write_event(self._event)

    def write_event(self, event):
        '''Append "event" to the file.'''

        # Check if event is of type event_pb2.Event proto.
        if not isinstance(event, event_pb2.Event):
            raise TypeError("Expected an event_pb2.Event proto, "
                            " but got %s" % type(event))
        return self._write_serialized_event(event.SerializeToString())

    def _write_serialized_event(self, event_str):
        with self._lock:
            self._num_outstanding_events += 1
            self._py_recordio_writer.write(event_str)

    def flush(self):
        '''Flushes the event file to disk.'''
        with self._lock:
            self._num_outstanding_events = 0
            self._py_recordio_writer.flush()
        return True

    def close(self):
        '''Call self.flush().'''
        return_value = self.flush()
        with self._lock:
            self._py_recordio_writer.close()
        return return_value


class EventFileWriter(object):
    """Writes `Event` protocol buffers to an event file.

    The `EventFileWriter` class creates an event file in the specified directory,
    and asynchronously writes Event protocol buffers to the file. The Event file
    is encoded using the tfrecord format, which is similar to RecordIO.
    """

    def __init__(self, logdir, max_queue_size=10, flush_secs=120, filename_suffix=''):
        """Creates a `EventFileWriter` and an event file to write to.

        On construction the summary writer creates a new event file in `logdir`.
        This event file will contain `Event` protocol buffers, which are written to
        disk via the add_event method.
        The other arguments to the constructor control the asynchronous writes to
        the event file:

        Args:
          logdir: A string. Directory where event file will be written.
          max_queue_size: Integer. Size of the queue for pending events and summaries.
          flush_secs: Number. How often, in seconds, to flush the
            pending events and summaries to disk.
        """
        self._logdir = logdir
        directory_check(self._logdir)
        self._event_queue = six.moves.queue.Queue(max_queue_size)
        self._ev_writer = EventsWriter(os.path.join(
            self._logdir, "events"), filename_suffix)
        self._flush_secs = flush_secs
        self._closed = False
        self._worker = _EventLoggerThread(self._event_queue, self._ev_writer,
                                          flush_secs)

        self._worker.start()

    def get_logdir(self):
        """Returns the directory where event file will be written."""
        return self._logdir

    def reopen(self):
        """Reopens the EventFileWriter.
        Can be called after `close()` to add more events in the same directory.
        The events will go into a new events file and a new write/flush worker
        is created. Does nothing if the EventFileWriter was not closed.
        """
        if self._closed:
            self._closed = False
            self._worker = _EventLoggerThread(
                self._event_queue, self._ev_writer, self._flush_secs
            )
            self._worker.start()

    def add_event(self, event):
        """Adds an event to the event file.

        Args:
          event: An `Event` protocol buffer.
        """
        if not self._closed:
            self._event_queue.put(event)

    def flush(self):
        """Flushes the event file to disk.

        Call this method to make sure that all pending events have been written to
        disk.
        """
        if not self._closed:
            self._event_queue.join()
            self._ev_writer.flush()

    def close(self):
        """Performs a final flush of the event file to disk, stops the
        write/flush worker and closes the file. Call this method when you do not
        need the summary writer anymore.
        """
        if not self._closed:
            self.flush()
            self._worker.stop()
            self._ev_writer.close()
            self._closed = True


class _EventLoggerThread(threading.Thread):
    """Thread that logs events."""

    def __init__(self, queue, record_writer, flush_secs):
        """Creates an _EventLoggerThread.
        Args:
          queue: A Queue from which to dequeue data.
          record_writer: An data writer. Used to log brain events for
           the visualizer.
          flush_secs: How often, in seconds, to flush the
            pending file to disk.
        """
        threading.Thread.__init__(self)
        self.daemon = True
        self._queue = queue
        self._record_writer = record_writer
        self._flush_secs = flush_secs
        # The first data will be flushed immediately.
        self._next_flush_time = 0
        self._has_pending_data = False
        self._shutdown_signal = object()

    def stop(self):
        self._queue.put(self._shutdown_signal)
        self.join()

    def run(self):
        # Here wait on the queue until an data appears, or till the next
        # time to flush the writer, whichever is earlier. If we have an
        # data, write it. If not, an empty queue exception will be raised
        # and we can proceed to flush the writer.
        while True:
            now = time.time()
            queue_wait_duration = self._next_flush_time - now
            data = None
            try:
                if queue_wait_duration > 0:
                    data = self._queue.get(True, queue_wait_duration)
                else:
                    data = self._queue.get(False)

                if data == self._shutdown_signal:
                    return
                self._record_writer.write_event(data)
                self._has_pending_data = True
            except six.moves.queue.Empty:
                pass
            finally:
                if data:
                    self._queue.task_done()

            now = time.time()
            if now > self._next_flush_time:
                if self._has_pending_data:
                    # Small optimization - if there are no pending data,
                    # there's no need to flush, since each flush can be
                    # expensive (e.g. uploading a new file to a server).
                    self._record_writer.flush()
                    self._has_pending_data = False
                # Do it again in flush_secs.
                self._next_flush_time = now + self._flush_secs


================================================
FILE: tensorboardX/tensorboardX/onnx_graph.py
================================================
from .proto.graph_pb2 import GraphDef
from .proto.node_def_pb2 import NodeDef
from .proto.versions_pb2 import VersionDef
from .proto.attr_value_pb2 import AttrValue
from .proto.tensor_shape_pb2 import TensorShapeProto


def load_onnx_graph(fname):
    import onnx
    m = onnx.load(fname)
    g = m.graph
    return parse(g)


def parse(graph):
    nodes_proto = []
    nodes = []
    import itertools
    for node in itertools.chain(graph.input, graph.output):
        nodes_proto.append(node)

    for node in nodes_proto:
        print(node.name)
        shapeproto = TensorShapeProto(
            dim=[TensorShapeProto.Dim(size=d.dim_value) for d in node.type.tensor_type.shape.dim])
        nodes.append(NodeDef(
            name=node.name.encode(encoding='utf_8'),
            op='Variable',
            input=[],
            attr={
                'dtype': AttrValue(type=node.type.tensor_type.elem_type),
                'shape': AttrValue(shape=shapeproto),
            })
        )

    for node in graph.node:
        attr = []
        for s in node.attribute:
            attr.append(' = '.join([str(f[1]) for f in s.ListFields()]))
        attr = ', '.join(attr).encode(encoding='utf_8')
        print(node.output[0])
        nodes.append(NodeDef(
            name=node.output[0].encode(encoding='utf_8'),
            op=node.op_type,
            input=node.input,
            attr={'parameters': AttrValue(s=attr)},
        ))

    # two pass token replacement, appends opname to object id
    mapping = {}
    for node in nodes:
        mapping[node.name] = node.op + '_' + node.name

    return GraphDef(node=nodes, versions=VersionDef(producer=22))


================================================
FILE: tensorboardX/tensorboardX/proto/__init__.py
================================================


================================================
FILE: tensorboardX/tensorboardX/proto/api.proto
================================================
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

// Defines a proto3-based REST API that the HParams web-component of the plugin
// would use to read data from a hyperparameter-tuning experiment.
// This file defines the message types (resources) used
// to pass information into and out of the API methods. These messages will be
// transmitted using proto3 native JSON encoding. See http_api.md for a
// description of the actual HTTP API.

// General note: in what follows we use the field 'name' of a message to
// stores its id. We avoid calling this field 'id' since it is a reserved word
// in Python, as well as to be more compliant with the API style guide
// detailed in https://cloud.google.com/apis/design/.

// IMPORTANT: If you change any of the messages here, make sure to also update
// api.d.ts accordingly.

syntax = "proto3";

import "google/protobuf/struct.proto";

package tensorboardX.hparam;

// Represents a single experiment.
// An experiment consists of multiple "sessions". Typically, in each session
// a model is trained for a given set of hyperparameter values. In each session
// a training program may generate one or more series of real numbers--each
// containing the evaluation of some metric on the model at different training
// steps.
//
// Note that Sessions can consist of multiple Tensorboard "runs", since in
// a distributed Tensorflow deployment, training can be accomplished using
// several cooporating processes, each one emitting Summary data to a different
// log directory or run. For example, in a single session one process could
// periodically compute the loss on the validation set, and another could
// compute the loss on the training set.
// NEXT_TAG: 7
message Experiment {
  // -- Experiments are scoped by a global name.
  // Currently, Tensorboard supports displaying data for a single experiment.
  string name = 6;

  // A description. May contain markdown.
  string description = 1;

  // An id for the owning user or group.
  string user = 2;

  // The time the experiment was created. In seconds since the UNIX epoch.
  double time_created_secs = 3;

  // Information about each hyperparameter used in the experiment.
  repeated HParamInfo hparam_infos = 4;

  // Information about each metric used in the experiment.
  repeated MetricInfo metric_infos = 5;
}

// NEXT_TAG: 7
message HParamInfo {
  // An id for the hyperparameter.
  string name = 1;

  // A string used to display the hyperparameter in the UI. If empty, the UI
  // will display the 'name' field.
  string display_name = 2;

  // A description. May contain markdown.
  string description = 3;

  // The data type of this hyperparameter.
  DataType type = 4;

  // Specifies the set of values this hyperparameter can hold. The UI assumes
  // every instance of this hyperparameter will hold a value from this set. It
  // is used by the UI to allow filtering so that only session groups (see
  // below) whose associated hyperparameter value "passes" the filter are
  // displayed. If this is not populated, the domain is assumed to be the
  // entire domain of the type of the hyperparameter.
  oneof domain {
    // A discrete set of the values this hyperparameter can hold.
    google.protobuf.ListValue domain_discrete = 5;
    // Numeric data type only. The (real) interval from which values of this
    // hyperparameter are taken.
    Interval domain_interval = 6;
  }
}

enum DataType {
  DATA_TYPE_UNSET = 0;
  DATA_TYPE_STRING = 1;
  DATA_TYPE_BOOL = 2;
  DATA_TYPE_FLOAT64 = 3;
}

// Represents the closed interval [min_value, max_value] of the real line.
// NEXT_TAG: 3
message Interval {
  double min_value = 1;
  double max_value = 2;
}

// NEXT_TAG: 3
message MetricName {
  // An identifier for a metric. A metric is a real-valued function of the
  // model. The UI can plot metrics for a session evaluated on the model at
  // different training steps.
  //
  // We identify a metric by a (group, tag) pair of strings. The UI treats
  // both of these as opaque strings. The only requirement is that the pair
  // uniquely identifies a metric in the experiment.
  //
  // We use a pair so the UI could allow the user to group metrics for a
  // single session by either group or tag to be displayed in the same chart.
  // For instance, one can set the metric group to correspond to the dataset
  // on which the model was evaluated, and the UI can then display different
  // metrics describing the same underlying computation and using different
  // datasets, on the same chart.
  //
  // When exporting summaries from Tensorflow, in a typical setup, a
  // training session exports evaluations of metrics at different training steps
  // as Scalar-plugin summaries--each having a run of the form
  // "<session_base_log_dir>/<sub_dir>", and some associated tag. The same
  // metric for different sessions would use the same sub_dir and tag, but
  // would have a different session_base_log_dir. For example, a session
  // computing two metrics: model loss on the validation set and model loss on
  // the training set, can export these as scalar summaries with the tag "loss"
  // and runs session_base_log_dir/validation and session_base_log_dir/training,
  // respectively. In this setup, the 'group' field can be populated with
  // the "sub_dir" associated with the metric, and the 'tag' field can be
  // populated with the tag: "loss".
  string group = 1;
  string tag = 2;
}

// NEXT_TAG: 6
message MetricInfo {
  MetricName name = 1;

  // A string used to display the metric in the UI. If empty, the UI
  // will display the 'name' field.
  string display_name = 3;

  // A description. May contain markdown.
  string description = 4;

  // The dataset type (validation, training) on which the metric is computed.
  DatasetType dataset_type = 5;
}

enum DatasetType {
  DATASET_UNKNOWN = 0;
  DATASET_TRAINING = 1;
  DATASET_VALIDATION = 2;
}

// In some experiments, the user trains a model with the same set of
// hyperparameters multiple times to get the distribution of metric
// evaluations, when the computation (such as the training algorithm, or metric
// evaluation) is non-deterministic. To make the UI aware of this, sessions
// are partitioned into groups: each group consists of all training sessions
// which share the same values for the hyperparameters. In experiments with no
// repeated executions, each group consists of exactly one session.
// NEXT_TAG: 6
message SessionGroup {
  string name = 1;

  // Stores the hyperparameters for sessions within this group as a mapping
  // from the hyperparameter name to its value.
  map<string /* hparam name */, google.protobuf.Value> hparams = 2;

  // A list of pairs (metric, value)--one for each metric in the experiment.
  // The value denotes the evaluation of the corresponding metric on
  // the model aggregated across the sessions in this group. The exact method
  // of aggregation is specified in the comments of ListSessionGroupsRequest.
  // Unfortunately, we can't store these as a map, since proto maps can't have
  // message keys.
  repeated MetricValue metric_values = 3;

  // The sessions belonging to this group.
  repeated Session sessions = 4;

  // An optional link to a web page monitoring the session group.
  string monitor_url = 5;
}

// NEXT_TAG: 5
message MetricValue {
  MetricName name = 1;

  double value = 2;

  // The training step at which this value is computed.
  int32 training_step = 3;

  // The wall time in seconds since UNIX epoch at which this value is computed.
  double wall_time_secs = 4;
}

// NEXT_TAG: 8
message Session {
  // An id for the session. Unique within an experiment (not just the group).
  string name = 1;

  // In seconds since the UNIX epoch.
  double start_time_secs = 2;

  // In seconds since the UNIX epoch.
  // May be 0 if unavailable or the session has not finished yet.
  double end_time_secs = 3;

  // May be STATUS_UNKNOWN if unavailable.
  Status status = 4;

  // A URI for a resource that will allow the user to reconstruct the model for
  // this session. E.g., in Tensorflow this could point to a directory where the
  // checkpoints are stored. Currently, this is treated opaquely by the UI
  // and only displayed to the user as it is passed here.
  string model_uri = 5;

  // Stores each metric evaluation on the model at the current training step.
  // Unfortunately, we can't store these as a map, since proto maps can't have
  // message keys.
  repeated MetricValue metric_values = 6;

  // An optional link to a web page monitoring the session.
  string monitor_url = 7;
}

// Represents the status of a Session.
enum Status {
  STATUS_UNKNOWN = 0;
  STATUS_SUCCESS = 1;
  STATUS_FAILURE = 2;
  STATUS_RUNNING = 3;
}

// Parameters for a GetExperiment API call.
// Each experiment is scoped by a unique global id.
// NEXT_TAG: 2
message GetExperimentRequest {
  // REQUIRED
  string experiment_name = 1;
}

// Parameters for a ListSessionGroups API call.
// Computes a list of the current session groups allowing for filtering and
// sorting by metrics and hyperparameter values. Returns a "slice" of
// that list specified by start_index and slice_size.
// NEXT_TAG: 8
message ListSessionGroupsRequest {
  string experiment_name = 6;

  // Filters the set of sessions (from which the session groups are formed) to
  // contain only these sessions whose status is contained in
  // 'allowed_statuses'.
  repeated Status allowed_statuses = 7;

  // A list of ColParams messages--one for each "column" of a session group. A
  // session group column contains either a metric evaluated at the current
  // reported computation step or a hyperparameter value. In addition to
  // 'regular' values, a column may take on a special 'missing-value' which
  // denotes that the hyperparameter or metric is not available
  // for the session group (for example, if the metric is not used in the
  // group).
  //
  // The ColParams messages in the repeated field below configure filtering and
  // sorting of the resulting collection of session groups. See the comments of
  // the fields in the ColParam message below for more details.
  repeated ColParams col_params = 1;

  // Fields controlling how to aggregate metrics across sessions within a
  // session group.
  // If aggregation_type is AGGREGATION_AVG, each metric value of the
  // session group is the average of the values of the metric across the
  // sessions.
  // Otherwise, the session group metric values are taken directly from a
  // "representative" session in the group, selected as a session for which
  // primary_metric takes on its minimum, maximum, or median value, as
  // specified by the choice of aggregation_type (for median, if the number of
  // sessions in the group is even, a session with a lower "middle" value is
  // chosen as the representative session).
  AggregationType aggregation_type = 2;

  // See comment for 'aggregation_type' above.
  MetricName aggregation_metric = 3;

  // The next two parameters determine the "slice" of the full list of
  // session groups--sorted and filtered by the parameters above--to return.
  // The 0-based index of the first session group to return.
  int32 start_index = 4;

  // The number of session groups to return starting at the session group
  // indexed by 'start_index'. The actual number of session groups returned
  // is min{slice_size, total_size - start_index}, where
  // total_size is the number of session groups in the full list
  // sorted and filtered by the parameters above (if start_index > total_size
  // no session groups are returned).
  int32 slice_size = 5;
}

// Defines parmeters for a ListSessionGroupsRequest for a specific column.
// See the comment for "ListSessionGroupsRequest" above for more details.
// NEXT_TAG: 9
message ColParams {
  oneof name {
    MetricName metric = 1;
    string hparam = 2;
  }

  // Sorting.
  // The final order of session groups in the response is defined by the sub
  // collection of ColParams messages (out of the
  // ListSessionGroupsRequest.col_params repeated field) whose 'order' field
  // (below) is not ORDER_UNSPECIFIED. In each of the messages in this
  // sub-collection, the next two fields specify the ordering of the values
  // and missing_values in the associated column of the session group. The
  // order of the ColParams messages themselves within the sub-collection
  // determines the "significance" of the associated column as a sorting key:
  // with the first being the primary sorting key, the second being the
  // secondary sorting key, etc.
  // Note: The 'session group name' is added as a least significant sorting
  // key to the keys defined by the user, so the order in the response is always
  // deterministic.
  SortOrder order = 3;
  // This field is ignored if order is ORDER_UNSPECIFIED.
  // Otherwise, if true, missing values are ordered before every other value in
  // the column; if false they are ordered after every other value in the
  // column.
  bool missing_values_first = 4;

  // Filtering.
  // The 'filter' oneof specifies a subset of the domain of the values a column
  // may take. Only session groups with each of their column values belonging
  // to this subset are included in the response. If this field is not
  // specified, the subset is taken to be the entire column domain.
  oneof filter {
    // Only valid for string-valued hyperparameter columns. The subset is
    // the set of all strings matching the regular expression stored
    // in 'regexp' as a partial match (use '^<regexp>$' to have a full
    // match against regexp).
    string filter_regexp = 5;

    // Only valid for numeric-valued columns. The subset is the given interval.
    Interval filter_interval = 6;

    // Valid for all data types. The subset is defined explicitly.
    google.protobuf.ListValue filter_discrete = 7;
  }
  // Specifies whether to exclude session groups whose column value is missing
  // from the response.
  bool exclude_missing_values = 8;
}

enum SortOrder {
  ORDER_UNSPECIFIED = 0;
  ORDER_ASC = 1;
  ORDER_DESC = 2;
}

enum AggregationType {
  AGGREGATION_UNSET = 0;
  AGGREGATION_AVG = 1;
  AGGREGATION_MEDIAN = 2;
  AGGREGATION_MIN = 3;
  AGGREGATION_MAX = 4;
}

// See ListSessionGroups in http_api.md.
// NEXT_TAG: 4
message ListSessionGroupsResponse {
  repeated SessionGroup session_groups = 1;

  // Denotes the total number of session groups in the full filtered list.
  // (Recall that this response may only be a slice).
  // It is used by the UI to calculate total number of pages and can be
  // set here to -1 to mean "unknown".
  int32 total_size = 3;
}

// See ListMetricEvalsRequest in http_api.md.
// NEXT_TAG: 4
message ListMetricEvalsRequest {
  string experiment_name = 3;
  string session_name = 1;
  MetricName metric_name = 2;
}


================================================
FILE: tensorboardX/tensorboardX/proto/api_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/api.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf.internal import enum_type_wrapper
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/api.proto',
  package='tensorboardX.hparam',
  syntax='proto3',
  serialized_options=None,
  serialized_pb=_b('\n\x1ctensorboardX/proto/api.proto\x12\x13tensorboardX.hparam\x1a\x1cgoogle/protobuf/struct.proto\"\xc6\x01\n\nExperiment\x12\x0c\n\x04name\x18\x06 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x01 \x01(\t\x12\x0c\n\x04user\x18\x02 \x01(\t\x12\x19\n\x11time_created_secs\x18\x03 \x01(\x01\x12\x35\n\x0chparam_infos\x18\x04 \x03(\x0b\x32\x1f.tensorboardX.hparam.HParamInfo\x12\x35\n\x0cmetric_infos\x18\x05 \x03(\x0b\x32\x1f.tensorboardX.hparam.MetricInfo\"\xed\x01\n\nHParamInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0c\x64isplay_name\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x03 \x01(\t\x12+\n\x04type\x18\x04 \x01(\x0e\x32\x1d.tensorboardX.hparam.DataType\x12\x35\n\x0f\x64omain_discrete\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.ListValueH\x00\x12\x38\n\x0f\x64omain_interval\x18\x06 \x01(\x0b\x32\x1d.tensorboardX.hparam.IntervalH\x00\x42\x08\n\x06\x64omain\"0\n\x08Interval\x12\x11\n\tmin_value\x18\x01 \x01(\x01\x12\x11\n\tmax_value\x18\x02 \x01(\x01\"(\n\nMetricName\x12\r\n\x05group\x18\x01 \x01(\t\x12\x0b\n\x03tag\x18\x02 \x01(\t\"\x9e\x01\n\nMetricInfo\x12-\n\x04name\x18\x01 \x01(\x0b\x32\x1f.tensorboardX.hparam.MetricName\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x04 \x01(\t\x12\x36\n\x0c\x64\x61taset_type\x18\x05 \x01(\x0e\x32 .tensorboardX.hparam.DatasetType\"\xa3\x02\n\x0cSessionGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12?\n\x07hparams\x18\x02 \x03(\x0b\x32..tensorboardX.hparam.SessionGroup.HparamsEntry\x12\x37\n\rmetric_values\x18\x03 \x03(\x0b\x32 .tensorboardX.hparam.MetricValue\x12.\n\x08sessions\x18\x04 \x03(\x0b\x32\x1c.tensorboardX.hparam.Session\x12\x13\n\x0bmonitor_url\x18\x05 \x01(\t\x1a\x46\n\x0cHparamsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\"z\n\x0bMetricValue\x12-\n\x04name\x18\x01 \x01(\x0b\x32\x1f.tensorboardX.hparam.MetricName\x12\r\n\x05value\x18\x02 \x01(\x01\x12\x15\n\rtraining_step\x18\x03 \x01(\x05\x12\x16\n\x0ewall_time_secs\x18\x04 \x01(\x01\"\xd5\x01\n\x07Session\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x17\n\x0fstart_time_secs\x18\x02 \x01(\x01\x12\x15\n\rend_time_secs\x18\x03 \x01(\x01\x12+\n\x06status\x18\x04 \x01(\x0e\x32\x1b.tensorboardX.hparam.Status\x12\x11\n\tmodel_uri\x18\x05 \x01(\t\x12\x37\n\rmetric_values\x18\x06 \x03(\x0b\x32 .tensorboardX.hparam.MetricValue\x12\x13\n\x0bmonitor_url\x18\x07 \x01(\t\"/\n\x14GetExperimentRequest\x12\x17\n\x0f\x65xperiment_name\x18\x01 \x01(\t\"\xc4\x02\n\x18ListSessionGroupsRequest\x12\x17\n\x0f\x65xperiment_name\x18\x06 \x01(\t\x12\x35\n\x10\x61llowed_statuses\x18\x07 \x03(\x0e\x32\x1b.tensorboardX.hparam.Status\x12\x32\n\ncol_params\x18\x01 \x03(\x0b\x32\x1e.tensorboardX.hparam.ColParams\x12>\n\x10\x61ggregation_type\x18\x02 \x01(\x0e\x32$.tensorboardX.hparam.AggregationType\x12;\n\x12\x61ggregation_metric\x18\x03 \x01(\x0b\x32\x1f.tensorboardX.hparam.MetricName\x12\x13\n\x0bstart_index\x18\x04 \x01(\x05\x12\x12\n\nslice_size\x18\x05 \x01(\x05\"\xd9\x02\n\tColParams\x12\x31\n\x06metric\x18\x01 \x01(\x0b\x32\x1f.tensorboardX.hparam.MetricNameH\x00\x12\x10\n\x06hparam\x18\x02 \x01(\tH\x00\x12-\n\x05order\x18\x03 \x01(\x0e\x32\x1e.tensorboardX.hparam.SortOrder\x12\x1c\n\x14missing_values_first\x18\x04 \x01(\x08\x12\x17\n\rfilter_regexp\x18\x05 \x01(\tH\x01\x12\x38\n\x0f\x66ilter_interval\x18\x06 \x01(\x0b\x32\x1d.tensorboardX.hparam.IntervalH\x01\x12\x35\n\x0f\x66ilter_discrete\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.ListValueH\x01\x12\x1e\n\x16\x65xclude_missing_values\x18\x08 \x01(\x08\x42\x06\n\x04nameB\x08\n\x06\x66ilter\"j\n\x19ListSessionGroupsResponse\x12\x39\n\x0esession_groups\x18\x01 \x03(\x0b\x32!.tensorboardX.hparam.SessionGroup\x12\x12\n\ntotal_size\x18\x03 \x01(\x05\"}\n\x16ListMetricEvalsRequest\x12\x17\n\x0f\x65xperiment_name\x18\x03 \x01(\t\x12\x14\n\x0csession_name\x18\x01 \x01(\t\x12\x34\n\x0bmetric_name\x18\x02 \x01(\x0b\x32\x1f.tensorboardX.hparam.MetricName*`\n\x08\x44\x61taType\x12\x13\n\x0f\x44\x41TA_TYPE_UNSET\x10\x00\x12\x14\n\x10\x44\x41TA_TYPE_STRING\x10\x01\x12\x12\n\x0e\x44\x41TA_TYPE_BOOL\x10\x02\x12\x15\n\x11\x44\x41TA_TYPE_FLOAT64\x10\x03*P\n\x0b\x44\x61tasetType\x12\x13\n\x0f\x44\x41TASET_UNKNOWN\x10\x00\x12\x14\n\x10\x44\x41TASET_TRAINING\x10\x01\x12\x16\n\x12\x44\x41TASET_VALIDATION\x10\x02*X\n\x06Status\x12\x12\n\x0eSTATUS_UNKNOWN\x10\x00\x12\x12\n\x0eSTATUS_SUCCESS\x10\x01\x12\x12\n\x0eSTATUS_FAILURE\x10\x02\x12\x12\n\x0eSTATUS_RUNNING\x10\x03*A\n\tSortOrder\x12\x15\n\x11ORDER_UNSPECIFIED\x10\x00\x12\r\n\tORDER_ASC\x10\x01\x12\x0e\n\nORDER_DESC\x10\x02*\x7f\n\x0f\x41ggregationType\x12\x15\n\x11\x41GGREGATION_UNSET\x10\x00\x12\x13\n\x0f\x41GGREGATION_AVG\x10\x01\x12\x16\n\x12\x41GGREGATION_MEDIAN\x10\x02\x12\x13\n\x0f\x41GGREGATION_MIN\x10\x03\x12\x13\n\x0f\x41GGREGATION_MAX\x10\x04\x62\x06proto3')
  ,
  dependencies=[google_dot_protobuf_dot_struct__pb2.DESCRIPTOR,])

_DATATYPE = _descriptor.EnumDescriptor(
  name='DataType',
  full_name='tensorboardX.hparam.DataType',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='DATA_TYPE_UNSET', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DATA_TYPE_STRING', index=1, number=1,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DATA_TYPE_BOOL', index=2, number=2,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DATA_TYPE_FLOAT64', index=3, number=3,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=2370,
  serialized_end=2466,
)
_sym_db.RegisterEnumDescriptor(_DATATYPE)

DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
_DATASETTYPE = _descriptor.EnumDescriptor(
  name='DatasetType',
  full_name='tensorboardX.hparam.DatasetType',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='DATASET_UNKNOWN', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DATASET_TRAINING', index=1, number=1,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DATASET_VALIDATION', index=2, number=2,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=2468,
  serialized_end=2548,
)
_sym_db.RegisterEnumDescriptor(_DATASETTYPE)

DatasetType = enum_type_wrapper.EnumTypeWrapper(_DATASETTYPE)
_STATUS = _descriptor.EnumDescriptor(
  name='Status',
  full_name='tensorboardX.hparam.Status',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='STATUS_UNKNOWN', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='STATUS_SUCCESS', index=1, number=1,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='STATUS_FAILURE', index=2, number=2,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='STATUS_RUNNING', index=3, number=3,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=2550,
  serialized_end=2638,
)
_sym_db.RegisterEnumDescriptor(_STATUS)

Status = enum_type_wrapper.EnumTypeWrapper(_STATUS)
_SORTORDER = _descriptor.EnumDescriptor(
  name='SortOrder',
  full_name='tensorboardX.hparam.SortOrder',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='ORDER_UNSPECIFIED', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='ORDER_ASC', index=1, number=1,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='ORDER_DESC', index=2, number=2,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=2640,
  serialized_end=2705,
)
_sym_db.RegisterEnumDescriptor(_SORTORDER)

SortOrder = enum_type_wrapper.EnumTypeWrapper(_SORTORDER)
_AGGREGATIONTYPE = _descriptor.EnumDescriptor(
  name='AggregationType',
  full_name='tensorboardX.hparam.AggregationType',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='AGGREGATION_UNSET', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='AGGREGATION_AVG', index=1, number=1,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='AGGREGATION_MEDIAN', index=2, number=2,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='AGGREGATION_MIN', index=3, number=3,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='AGGREGATION_MAX', index=4, number=4,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=2707,
  serialized_end=2834,
)
_sym_db.RegisterEnumDescriptor(_AGGREGATIONTYPE)

AggregationType = enum_type_wrapper.EnumTypeWrapper(_AGGREGATIONTYPE)
DATA_TYPE_UNSET = 0
DATA_TYPE_STRING = 1
DATA_TYPE_BOOL = 2
DATA_TYPE_FLOAT64 = 3
DATASET_UNKNOWN = 0
DATASET_TRAINING = 1
DATASET_VALIDATION = 2
STATUS_UNKNOWN = 0
STATUS_SUCCESS = 1
STATUS_FAILURE = 2
STATUS_RUNNING = 3
ORDER_UNSPECIFIED = 0
ORDER_ASC = 1
ORDER_DESC = 2
AGGREGATION_UNSET = 0
AGGREGATION_AVG = 1
AGGREGATION_MEDIAN = 2
AGGREGATION_MIN = 3
AGGREGATION_MAX = 4


_EXPERIMENT = _descriptor.Descriptor(
  name='Experiment',
  full_name='tensorboardX.hparam.Experiment',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.hparam.Experiment.name', index=0,
      number=6, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='description', full_name='tensorboardX.hparam.Experiment.description', index=1,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='user', full_name='tensorboardX.hparam.Experiment.user', index=2,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='time_created_secs', full_name='tensorboardX.hparam.Experiment.time_created_secs', index=3,
      number=3, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='hparam_infos', full_name='tensorboardX.hparam.Experiment.hparam_infos', index=4,
      number=4, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='metric_infos', full_name='tensorboardX.hparam.Experiment.metric_infos', index=5,
      number=5, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=84,
  serialized_end=282,
)


_HPARAMINFO = _descriptor.Descriptor(
  name='HParamInfo',
  full_name='tensorboardX.hparam.HParamInfo',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.hparam.HParamInfo.name', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='display_name', full_name='tensorboardX.hparam.HParamInfo.display_name', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='description', full_name='tensorboardX.hparam.HParamInfo.description', index=2,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='type', full_name='tensorboardX.hparam.HParamInfo.type', index=3,
      number=4, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='domain_discrete', full_name='tensorboardX.hparam.HParamInfo.domain_discrete', index=4,
      number=5, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='domain_interval', full_name='tensorboardX.hparam.HParamInfo.domain_interval', index=5,
      number=6, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
    _descriptor.OneofDescriptor(
      name='domain', full_name='tensorboardX.hparam.HParamInfo.domain',
      index=0, containing_type=None, fields=[]),
  ],
  serialized_start=285,
  serialized_end=522,
)


_INTERVAL = _descriptor.Descriptor(
  name='Interval',
  full_name='tensorboardX.hparam.Interval',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='min_value', full_name='tensorboardX.hparam.Interval.min_value', index=0,
      number=1, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='max_value', full_name='tensorboardX.hparam.Interval.max_value', index=1,
      number=2, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=524,
  serialized_end=572,
)


_METRICNAME = _descriptor.Descriptor(
  name='MetricName',
  full_name='tensorboardX.hparam.MetricName',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='group', full_name='tensorboardX.hparam.MetricName.group', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='tag', full_name='tensorboardX.hparam.MetricName.tag', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=574,
  serialized_end=614,
)


_METRICINFO = _descriptor.Descriptor(
  name='MetricInfo',
  full_name='tensorboardX.hparam.MetricInfo',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.hparam.MetricInfo.name', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='display_name', full_name='tensorboardX.hparam.MetricInfo.display_name', index=1,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='description', full_name='tensorboardX.hparam.MetricInfo.description', index=2,
      number=4, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='dataset_type', full_name='tensorboardX.hparam.MetricInfo.dataset_type', index=3,
      number=5, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=617,
  serialized_end=775,
)


_SESSIONGROUP_HPARAMSENTRY = _descriptor.Descriptor(
  name='HparamsEntry',
  full_name='tensorboardX.hparam.SessionGroup.HparamsEntry',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='key', full_name='tensorboardX.hparam.SessionGroup.HparamsEntry.key', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='value', full_name='tensorboardX.hparam.SessionGroup.HparamsEntry.value', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=_b('8\001'),
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=999,
  serialized_end=1069,
)

_SESSIONGROUP = _descriptor.Descriptor(
  name='SessionGroup',
  full_name='tensorboardX.hparam.SessionGroup',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.hparam.SessionGroup.name', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='hparams', full_name='tensorboardX.hparam.SessionGroup.hparams', index=1,
      number=2, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='metric_values', full_name='tensorboardX.hparam.SessionGroup.metric_values', index=2,
      number=3, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='sessions', full_name='tensorboardX.hparam.SessionGroup.sessions', index=3,
      number=4, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='monitor_url', full_name='tensorboardX.hparam.SessionGroup.monitor_url', index=4,
      number=5, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_SESSIONGROUP_HPARAMSENTRY, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=778,
  serialized_end=1069,
)


_METRICVALUE = _descriptor.Descriptor(
  name='MetricValue',
  full_name='tensorboardX.hparam.MetricValue',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.hparam.MetricValue.name', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='value', full_name='tensorboardX.hparam.MetricValue.value', index=1,
      number=2, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='training_step', full_name='tensorboardX.hparam.MetricValue.training_step', index=2,
      number=3, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='wall_time_secs', full_name='tensorboardX.hparam.MetricValue.wall_time_secs', index=3,
      number=4, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1071,
  serialized_end=1193,
)


_SESSION = _descriptor.Descriptor(
  name='Session',
  full_name='tensorboardX.hparam.Session',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.hparam.Session.name', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='start_time_secs', full_name='tensorboardX.hparam.Session.start_time_secs', index=1,
      number=2, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='end_time_secs', full_name='tensorboardX.hparam.Session.end_time_secs', index=2,
      number=3, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='status', full_name='tensorboardX.hparam.Session.status', index=3,
      number=4, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='model_uri', full_name='tensorboardX.hparam.Session.model_uri', index=4,
      number=5, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='metric_values', full_name='tensorboardX.hparam.Session.metric_values', index=5,
      number=6, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='monitor_url', full_name='tensorboardX.hparam.Session.monitor_url', index=6,
      number=7, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1196,
  serialized_end=1409,
)


_GETEXPERIMENTREQUEST = _descriptor.Descriptor(
  name='GetExperimentRequest',
  full_name='tensorboardX.hparam.GetExperimentRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='experiment_name', full_name='tensorboardX.hparam.GetExperimentRequest.experiment_name', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1411,
  serialized_end=1458,
)


_LISTSESSIONGROUPSREQUEST = _descriptor.Descriptor(
  name='ListSessionGroupsRequest',
  full_name='tensorboardX.hparam.ListSessionGroupsRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='experiment_name', full_name='tensorboardX.hparam.ListSessionGroupsRequest.experiment_name', index=0,
      number=6, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='allowed_statuses', full_name='tensorboardX.hparam.ListSessionGroupsRequest.allowed_statuses', index=1,
      number=7, type=14, cpp_type=8, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='col_params', full_name='tensorboardX.hparam.ListSessionGroupsRequest.col_params', index=2,
      number=1, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='aggregation_type', full_name='tensorboardX.hparam.ListSessionGroupsRequest.aggregation_type', index=3,
      number=2, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='aggregation_metric', full_name='tensorboardX.hparam.ListSessionGroupsRequest.aggregation_metric', index=4,
      number=3, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='start_index', full_name='tensorboardX.hparam.ListSessionGroupsRequest.start_index', index=5,
      number=4, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='slice_size', full_name='tensorboardX.hparam.ListSessionGroupsRequest.slice_size', index=6,
      number=5, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1461,
  serialized_end=1785,
)


_COLPARAMS = _descriptor.Descriptor(
  name='ColParams',
  full_name='tensorboardX.hparam.ColParams',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='metric', full_name='tensorboardX.hparam.ColParams.metric', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='hparam', full_name='tensorboardX.hparam.ColParams.hparam', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='order', full_name='tensorboardX.hparam.ColParams.order', index=2,
      number=3, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='missing_values_first', full_name='tensorboardX.hparam.ColParams.missing_values_first', index=3,
      number=4, type=8, cpp_type=7, label=1,
      has_default_value=False, default_value=False,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='filter_regexp', full_name='tensorboardX.hparam.ColParams.filter_regexp', index=4,
      number=5, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='filter_interval', full_name='tensorboardX.hparam.ColParams.filter_interval', index=5,
      number=6, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='filter_discrete', full_name='tensorboardX.hparam.ColParams.filter_discrete', index=6,
      number=7, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='exclude_missing_values', full_name='tensorboardX.hparam.ColParams.exclude_missing_values', index=7,
      number=8, type=8, cpp_type=7, label=1,
      has_default_value=False, default_value=False,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
    _descriptor.OneofDescriptor(
      name='name', full_name='tensorboardX.hparam.ColParams.name',
      index=0, containing_type=None, fields=[]),
    _descriptor.OneofDescriptor(
      name='filter', full_name='tensorboardX.hparam.ColParams.filter',
      index=1, containing_type=None, fields=[]),
  ],
  serialized_start=1788,
  serialized_end=2133,
)


_LISTSESSIONGROUPSRESPONSE = _descriptor.Descriptor(
  name='ListSessionGroupsResponse',
  full_name='tensorboardX.hparam.ListSessionGroupsResponse',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='session_groups', full_name='tensorboardX.hparam.ListSessionGroupsResponse.session_groups', index=0,
      number=1, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='total_size', full_name='tensorboardX.hparam.ListSessionGroupsResponse.total_size', index=1,
      number=3, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2135,
  serialized_end=2241,
)


_LISTMETRICEVALSREQUEST = _descriptor.Descriptor(
  name='ListMetricEvalsRequest',
  full_name='tensorboardX.hparam.ListMetricEvalsRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='experiment_name', full_name='tensorboardX.hparam.ListMetricEvalsRequest.experiment_name', index=0,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='session_name', full_name='tensorboardX.hparam.ListMetricEvalsRequest.session_name', index=1,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='metric_name', full_name='tensorboardX.hparam.ListMetricEvalsRequest.metric_name', index=2,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2243,
  serialized_end=2368,
)

_EXPERIMENT.fields_by_name['hparam_infos'].message_type = _HPARAMINFO
_EXPERIMENT.fields_by_name['metric_infos'].message_type = _METRICINFO
_HPARAMINFO.fields_by_name['type'].enum_type = _DATATYPE
_HPARAMINFO.fields_by_name['domain_discrete'].message_type = google_dot_protobuf_dot_struct__pb2._LISTVALUE
_HPARAMINFO.fields_by_name['domain_interval'].message_type = _INTERVAL
_HPARAMINFO.oneofs_by_name['domain'].fields.append(
  _HPARAMINFO.fields_by_name['domain_discrete'])
_HPARAMINFO.fields_by_name['domain_discrete'].containing_oneof = _HPARAMINFO.oneofs_by_name['domain']
_HPARAMINFO.oneofs_by_name['domain'].fields.append(
  _HPARAMINFO.fields_by_name['domain_interval'])
_HPARAMINFO.fields_by_name['domain_interval'].containing_oneof = _HPARAMINFO.oneofs_by_name['domain']
_METRICINFO.fields_by_name['name'].message_type = _METRICNAME
_METRICINFO.fields_by_name['dataset_type'].enum_type = _DATASETTYPE
_SESSIONGROUP_HPARAMSENTRY.fields_by_name['value'].message_type = google_dot_protobuf_dot_struct__pb2._VALUE
_SESSIONGROUP_HPARAMSENTRY.containing_type = _SESSIONGROUP
_SESSIONGROUP.fields_by_name['hparams'].message_type = _SESSIONGROUP_HPARAMSENTRY
_SESSIONGROUP.fields_by_name['metric_values'].message_type = _METRICVALUE
_SESSIONGROUP.fields_by_name['sessions'].message_type = _SESSION
_METRICVALUE.fields_by_name['name'].message_type = _METRICNAME
_SESSION.fields_by_name['status'].enum_type = _STATUS
_SESSION.fields_by_name['metric_values'].message_type = _METRICVALUE
_LISTSESSIONGROUPSREQUEST.fields_by_name['allowed_statuses'].enum_type = _STATUS
_LISTSESSIONGROUPSREQUEST.fields_by_name['col_params'].message_type = _COLPARAMS
_LISTSESSIONGROUPSREQUEST.fields_by_name['aggregation_type'].enum_type = _AGGREGATIONTYPE
_LISTSESSIONGROUPSREQUEST.fields_by_name['aggregation_metric'].message_type = _METRICNAME
_COLPARAMS.fields_by_name['metric'].message_type = _METRICNAME
_COLPARAMS.fields_by_name['order'].enum_type = _SORTORDER
_COLPARAMS.fields_by_name['filter_interval'].message_type = _INTERVAL
_COLPARAMS.fields_by_name['filter_discrete'].message_type = google_dot_protobuf_dot_struct__pb2._LISTVALUE
_COLPARAMS.oneofs_by_name['name'].fields.append(
  _COLPARAMS.fields_by_name['metric'])
_COLPARAMS.fields_by_name['metric'].containing_oneof = _COLPARAMS.oneofs_by_name['name']
_COLPARAMS.oneofs_by_name['name'].fields.append(
  _COLPARAMS.fields_by_name['hparam'])
_COLPARAMS.fields_by_name['hparam'].containing_oneof = _COLPARAMS.oneofs_by_name['name']
_COLPARAMS.oneofs_by_name['filter'].fields.append(
  _COLPARAMS.fields_by_name['filter_regexp'])
_COLPARAMS.fields_by_name['filter_regexp'].containing_oneof = _COLPARAMS.oneofs_by_name['filter']
_COLPARAMS.oneofs_by_name['filter'].fields.append(
  _COLPARAMS.fields_by_name['filter_interval'])
_COLPARAMS.fields_by_name['filter_interval'].containing_oneof = _COLPARAMS.oneofs_by_name['filter']
_COLPARAMS.oneofs_by_name['filter'].fields.append(
  _COLPARAMS.fields_by_name['filter_discrete'])
_COLPARAMS.fields_by_name['filter_discrete'].containing_oneof = _COLPARAMS.oneofs_by_name['filter']
_LISTSESSIONGROUPSRESPONSE.fields_by_name['session_groups'].message_type = _SESSIONGROUP
_LISTMETRICEVALSREQUEST.fields_by_name['metric_name'].message_type = _METRICNAME
DESCRIPTOR.message_types_by_name['Experiment'] = _EXPERIMENT
DESCRIPTOR.message_types_by_name['HParamInfo'] = _HPARAMINFO
DESCRIPTOR.message_types_by_name['Interval'] = _INTERVAL
DESCRIPTOR.message_types_by_name['MetricName'] = _METRICNAME
DESCRIPTOR.message_types_by_name['MetricInfo'] = _METRICINFO
DESCRIPTOR.message_types_by_name['SessionGroup'] = _SESSIONGROUP
DESCRIPTOR.message_types_by_name['MetricValue'] = _METRICVALUE
DESCRIPTOR.message_types_by_name['Session'] = _SESSION
DESCRIPTOR.message_types_by_name['GetExperimentRequest'] = _GETEXPERIMENTREQUEST
DESCRIPTOR.message_types_by_name['ListSessionGroupsRequest'] = _LISTSESSIONGROUPSREQUEST
DESCRIPTOR.message_types_by_name['ColParams'] = _COLPARAMS
DESCRIPTOR.message_types_by_name['ListSessionGroupsResponse'] = _LISTSESSIONGROUPSRESPONSE
DESCRIPTOR.message_types_by_name['ListMetricEvalsRequest'] = _LISTMETRICEVALSREQUEST
DESCRIPTOR.enum_types_by_name['DataType'] = _DATATYPE
DESCRIPTOR.enum_types_by_name['DatasetType'] = _DATASETTYPE
DESCRIPTOR.enum_types_by_name['Status'] = _STATUS
DESCRIPTOR.enum_types_by_name['SortOrder'] = _SORTORDER
DESCRIPTOR.enum_types_by_name['AggregationType'] = _AGGREGATIONTYPE
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

Experiment = _reflection.GeneratedProtocolMessageType('Experiment', (_message.Message,), dict(
  DESCRIPTOR = _EXPERIMENT,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.Experiment)
  ))
_sym_db.RegisterMessage(Experiment)

HParamInfo = _reflection.GeneratedProtocolMessageType('HParamInfo', (_message.Message,), dict(
  DESCRIPTOR = _HPARAMINFO,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.HParamInfo)
  ))
_sym_db.RegisterMessage(HParamInfo)

Interval = _reflection.GeneratedProtocolMessageType('Interval', (_message.Message,), dict(
  DESCRIPTOR = _INTERVAL,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.Interval)
  ))
_sym_db.RegisterMessage(Interval)

MetricName = _reflection.GeneratedProtocolMessageType('MetricName', (_message.Message,), dict(
  DESCRIPTOR = _METRICNAME,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.MetricName)
  ))
_sym_db.RegisterMessage(MetricName)

MetricInfo = _reflection.GeneratedProtocolMessageType('MetricInfo', (_message.Message,), dict(
  DESCRIPTOR = _METRICINFO,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.MetricInfo)
  ))
_sym_db.RegisterMessage(MetricInfo)

SessionGroup = _reflection.GeneratedProtocolMessageType('SessionGroup', (_message.Message,), dict(

  HparamsEntry = _reflection.GeneratedProtocolMessageType('HparamsEntry', (_message.Message,), dict(
    DESCRIPTOR = _SESSIONGROUP_HPARAMSENTRY,
    __module__ = 'tensorboardX.proto.api_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.hparam.SessionGroup.HparamsEntry)
    ))
  ,
  DESCRIPTOR = _SESSIONGROUP,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.SessionGroup)
  ))
_sym_db.RegisterMessage(SessionGroup)
_sym_db.RegisterMessage(SessionGroup.HparamsEntry)

MetricValue = _reflection.GeneratedProtocolMessageType('MetricValue', (_message.Message,), dict(
  DESCRIPTOR = _METRICVALUE,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.MetricValue)
  ))
_sym_db.RegisterMessage(MetricValue)

Session = _reflection.GeneratedProtocolMessageType('Session', (_message.Message,), dict(
  DESCRIPTOR = _SESSION,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.Session)
  ))
_sym_db.RegisterMessage(Session)

GetExperimentRequest = _reflection.GeneratedProtocolMessageType('GetExperimentRequest', (_message.Message,), dict(
  DESCRIPTOR = _GETEXPERIMENTREQUEST,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.GetExperimentRequest)
  ))
_sym_db.RegisterMessage(GetExperimentRequest)

ListSessionGroupsRequest = _reflection.GeneratedProtocolMessageType('ListSessionGroupsRequest', (_message.Message,), dict(
  DESCRIPTOR = _LISTSESSIONGROUPSREQUEST,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.ListSessionGroupsRequest)
  ))
_sym_db.RegisterMessage(ListSessionGroupsRequest)

ColParams = _reflection.GeneratedProtocolMessageType('ColParams', (_message.Message,), dict(
  DESCRIPTOR = _COLPARAMS,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.ColParams)
  ))
_sym_db.RegisterMessage(ColParams)

ListSessionGroupsResponse = _reflection.GeneratedProtocolMessageType('ListSessionGroupsResponse', (_message.Message,), dict(
  DESCRIPTOR = _LISTSESSIONGROUPSRESPONSE,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.ListSessionGroupsResponse)
  ))
_sym_db.RegisterMessage(ListSessionGroupsResponse)

ListMetricEvalsRequest = _reflection.GeneratedProtocolMessageType('ListMetricEvalsRequest', (_message.Message,), dict(
  DESCRIPTOR = _LISTMETRICEVALSREQUEST,
  __module__ = 'tensorboardX.proto.api_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.ListMetricEvalsRequest)
  ))
_sym_db.RegisterMessage(ListMetricEvalsRequest)


_SESSIONGROUP_HPARAMSENTRY._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/attr_value.proto
================================================
syntax = "proto3";

package tensorboardX;
option cc_enable_arenas = true;
option java_outer_classname = "AttrValueProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "tensorboardX/proto/tensor.proto";
import "tensorboardX/proto/tensor_shape.proto";
import "tensorboardX/proto/types.proto";

// Protocol buffer representing the value for an attr used to configure an Op.
// Comment indicates the corresponding attr type.  Only the field matching the
// attr type may be filled.
message AttrValue {
  // LINT.IfChange
  message ListValue {
    repeated bytes s = 2;                        // "list(string)"
    repeated int64 i = 3 [packed = true];        // "list(int)"
    repeated float f = 4 [packed = true];        // "list(float)"
    repeated bool b = 5 [packed = true];         // "list(bool)"
    repeated DataType type = 6 [packed = true];  // "list(type)"
    repeated TensorShapeProto shape = 7;         // "list(shape)"
    repeated TensorProto tensor = 8;             // "list(tensor)"
    repeated NameAttrList func = 9;              // "list(attr)"
  }
  // LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc)

  oneof value {
    bytes s = 2;                 // "string"
    int64 i = 3;                 // "int"
    float f = 4;                 // "float"
    bool b = 5;                  // "bool"
    DataType type = 6;           // "type"
    TensorShapeProto shape = 7;  // "shape"
    TensorProto tensor = 8;      // "tensor"
    ListValue list = 1;          // any "list(...)"

    // "func" represents a function. func.name is a function's name or
    // a primitive op's name. func.attr.first is the name of an attr
    // defined for that function. func.attr.second is the value for
    // that attr in the instantiation.
    NameAttrList func = 10;

    // This is a placeholder only used in nodes defined inside a
    // function.  It indicates the attr value will be supplied when
    // the function is instantiated.  For example, let us suppose a
    // node "N" in function "FN". "N" has an attr "A" with value
    // placeholder = "foo". When FN is instantiated with attr "foo"
    // set to "bar", the instantiated node N's attr A will have been
    // given the value "bar".
    string placeholder = 9;
  }
}

// A list of attr names and their values. The whole list is attached
// with a string name.  E.g., MatMul[T=float].
message NameAttrList {
  string name = 1;
  map<string, AttrValue> attr = 2;
}


================================================
FILE: tensorboardX/tensorboardX/proto/attr_value_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/attr_value.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


from tensorboardX.proto import tensor_pb2 as tensorboardX_dot_proto_dot_tensor__pb2
from tensorboardX.proto import tensor_shape_pb2 as tensorboardX_dot_proto_dot_tensor__shape__pb2
from tensorboardX.proto import types_pb2 as tensorboardX_dot_proto_dot_types__pb2


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/attr_value.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\030org.tensorflow.frameworkB\017AttrValueProtosP\001\370\001\001'),
  serialized_pb=_b('\n#tensorboardX/proto/attr_value.proto\x12\x0ctensorboardX\x1a\x1ftensorboardX/proto/tensor.proto\x1a%tensorboardX/proto/tensor_shape.proto\x1a\x1etensorboardX/proto/types.proto\"\xb8\x04\n\tAttrValue\x12\x0b\n\x01s\x18\x02 \x01(\x0cH\x00\x12\x0b\n\x01i\x18\x03 \x01(\x03H\x00\x12\x0b\n\x01\x66\x18\x04 \x01(\x02H\x00\x12\x0b\n\x01\x62\x18\x05 \x01(\x08H\x00\x12&\n\x04type\x18\x06 \x01(\x0e\x32\x16.tensorboardX.DataTypeH\x00\x12/\n\x05shape\x18\x07 \x01(\x0b\x32\x1e.tensorboardX.TensorShapeProtoH\x00\x12+\n\x06tensor\x18\x08 \x01(\x0b\x32\x19.tensorboardX.TensorProtoH\x00\x12\x31\n\x04list\x18\x01 \x01(\x0b\x32!.tensorboardX.AttrValue.ListValueH\x00\x12*\n\x04\x66unc\x18\n \x01(\x0b\x32\x1a.tensorboardX.NameAttrListH\x00\x12\x15\n\x0bplaceholder\x18\t \x01(\tH\x00\x1a\xf1\x01\n\tListValue\x12\t\n\x01s\x18\x02 \x03(\x0c\x12\r\n\x01i\x18\x03 \x03(\x03\x42\x02\x10\x01\x12\r\n\x01\x66\x18\x04 \x03(\x02\x42\x02\x10\x01\x12\r\n\x01\x62\x18\x05 \x03(\x08\x42\x02\x10\x01\x12(\n\x04type\x18\x06 \x03(\x0e\x32\x16.tensorboardX.DataTypeB\x02\x10\x01\x12-\n\x05shape\x18\x07 \x03(\x0b\x32\x1e.tensorboardX.TensorShapeProto\x12)\n\x06tensor\x18\x08 \x03(\x0b\x32\x19.tensorboardX.TensorProto\x12(\n\x04\x66unc\x18\t \x03(\x0b\x32\x1a.tensorboardX.NameAttrListB\x07\n\x05value\"\x96\x01\n\x0cNameAttrList\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x32\n\x04\x61ttr\x18\x02 \x03(\x0b\x32$.tensorboardX.NameAttrList.AttrEntry\x1a\x44\n\tAttrEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12&\n\x05value\x18\x02 \x01(\x0b\x32\x17.tensorboardX.AttrValue:\x02\x38\x01\x42\x30\n\x18org.tensorflow.frameworkB\x0f\x41ttrValueProtosP\x01\xf8\x01\x01\x62\x06proto3')
  ,
  dependencies=[tensorboardX_dot_proto_dot_tensor__pb2.DESCRIPTOR,tensorboardX_dot_proto_dot_tensor__shape__pb2.DESCRIPTOR,tensorboardX_dot_proto_dot_types__pb2.DESCRIPTOR,])


_ATTRVALUE_LISTVALUE = _descriptor.Descriptor(
  name='ListValue',
  full_name='tensorboardX.AttrValue.ListValue',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='s', full_name='tensorboardX.AttrValue.ListValue.s', index=0,
      number=2, type=12, cpp_type=9, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='i', full_name='tensorboardX.AttrValue.ListValue.i', index=1,
      number=3, type=3, cpp_type=2, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='f', full_name='tensorboardX.AttrValue.ListValue.f', index=2,
      number=4, type=2, cpp_type=6, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='b', full_name='tensorboardX.AttrValue.ListValue.b', index=3,
      number=5, type=8, cpp_type=7, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='type', full_name='tensorboardX.AttrValue.ListValue.type', index=4,
      number=6, type=14, cpp_type=8, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='shape', full_name='tensorboardX.AttrValue.ListValue.shape', index=5,
      number=7, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='tensor', full_name='tensorboardX.AttrValue.ListValue.tensor', index=6,
      number=8, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='func', full_name='tensorboardX.AttrValue.ListValue.func', index=7,
      number=9, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=476,
  serialized_end=717,
)

_ATTRVALUE = _descriptor.Descriptor(
  name='AttrValue',
  full_name='tensorboardX.AttrValue',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='s', full_name='tensorboardX.AttrValue.s', index=0,
      number=2, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=_b(""),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='i', full_name='tensorboardX.AttrValue.i', index=1,
      number=3, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='f', full_name='tensorboardX.AttrValue.f', index=2,
      number=4, type=2, cpp_type=6, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='b', full_name='tensorboardX.AttrValue.b', index=3,
      number=5, type=8, cpp_type=7, label=1,
      has_default_value=False, default_value=False,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='type', full_name='tensorboardX.AttrValue.type', index=4,
      number=6, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='shape', full_name='tensorboardX.AttrValue.shape', index=5,
      number=7, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='tensor', full_name='tensorboardX.AttrValue.tensor', index=6,
      number=8, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='list', full_name='tensorboardX.AttrValue.list', index=7,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='func', full_name='tensorboardX.AttrValue.func', index=8,
      number=10, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='placeholder', full_name='tensorboardX.AttrValue.placeholder', index=9,
      number=9, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_ATTRVALUE_LISTVALUE, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
    _descriptor.OneofDescriptor(
      name='value', full_name='tensorboardX.AttrValue.value',
      index=0, containing_type=None, fields=[]),
  ],
  serialized_start=158,
  serialized_end=726,
)


_NAMEATTRLIST_ATTRENTRY = _descriptor.Descriptor(
  name='AttrEntry',
  full_name='tensorboardX.NameAttrList.AttrEntry',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='key', full_name='tensorboardX.NameAttrList.AttrEntry.key', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='value', full_name='tensorboardX.NameAttrList.AttrEntry.value', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=_b('8\001'),
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=811,
  serialized_end=879,
)

_NAMEATTRLIST = _descriptor.Descriptor(
  name='NameAttrList',
  full_name='tensorboardX.NameAttrList',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.NameAttrList.name', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='attr', full_name='tensorboardX.NameAttrList.attr', index=1,
      number=2, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_NAMEATTRLIST_ATTRENTRY, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=729,
  serialized_end=879,
)

_ATTRVALUE_LISTVALUE.fields_by_name['type'].enum_type = tensorboardX_dot_proto_dot_types__pb2._DATATYPE
_ATTRVALUE_LISTVALUE.fields_by_name['shape'].message_type = tensorboardX_dot_proto_dot_tensor__shape__pb2._TENSORSHAPEPROTO
_ATTRVALUE_LISTVALUE.fields_by_name['tensor'].message_type = tensorboardX_dot_proto_dot_tensor__pb2._TENSORPROTO
_ATTRVALUE_LISTVALUE.fields_by_name['func'].message_type = _NAMEATTRLIST
_ATTRVALUE_LISTVALUE.containing_type = _ATTRVALUE
_ATTRVALUE.fields_by_name['type'].enum_type = tensorboardX_dot_proto_dot_types__pb2._DATATYPE
_ATTRVALUE.fields_by_name['shape'].message_type = tensorboardX_dot_proto_dot_tensor__shape__pb2._TENSORSHAPEPROTO
_ATTRVALUE.fields_by_name['tensor'].message_type = tensorboardX_dot_proto_dot_tensor__pb2._TENSORPROTO
_ATTRVALUE.fields_by_name['list'].message_type = _ATTRVALUE_LISTVALUE
_ATTRVALUE.fields_by_name['func'].message_type = _NAMEATTRLIST
_ATTRVALUE.oneofs_by_name['value'].fields.append(
  _ATTRVALUE.fields_by_name['s'])
_ATTRVALUE.fields_by_name['s'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
_ATTRVALUE.oneofs_by_name['value'].fields.append(
  _ATTRVALUE.fields_by_name['i'])
_ATTRVALUE.fields_by_name['i'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
_ATTRVALUE.oneofs_by_name['value'].fields.append(
  _ATTRVALUE.fields_by_name['f'])
_ATTRVALUE.fields_by_name['f'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
_ATTRVALUE.oneofs_by_name['value'].fields.append(
  _ATTRVALUE.fields_by_name['b'])
_ATTRVALUE.fields_by_name['b'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
_ATTRVALUE.oneofs_by_name['value'].fields.append(
  _ATTRVALUE.fields_by_name['type'])
_ATTRVALUE.fields_by_name['type'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
_ATTRVALUE.oneofs_by_name['value'].fields.append(
  _ATTRVALUE.fields_by_name['shape'])
_ATTRVALUE.fields_by_name['shape'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
_ATTRVALUE.oneofs_by_name['value'].fields.append(
  _ATTRVALUE.fields_by_name['tensor'])
_ATTRVALUE.fields_by_name['tensor'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
_ATTRVALUE.oneofs_by_name['value'].fields.append(
  _ATTRVALUE.fields_by_name['list'])
_ATTRVALUE.fields_by_name['list'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
_ATTRVALUE.oneofs_by_name['value'].fields.append(
  _ATTRVALUE.fields_by_name['func'])
_ATTRVALUE.fields_by_name['func'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
_ATTRVALUE.oneofs_by_name['value'].fields.append(
  _ATTRVALUE.fields_by_name['placeholder'])
_ATTRVALUE.fields_by_name['placeholder'].containing_oneof = _ATTRVALUE.oneofs_by_name['value']
_NAMEATTRLIST_ATTRENTRY.fields_by_name['value'].message_type = _ATTRVALUE
_NAMEATTRLIST_ATTRENTRY.containing_type = _NAMEATTRLIST
_NAMEATTRLIST.fields_by_name['attr'].message_type = _NAMEATTRLIST_ATTRENTRY
DESCRIPTOR.message_types_by_name['AttrValue'] = _ATTRVALUE
DESCRIPTOR.message_types_by_name['NameAttrList'] = _NAMEATTRLIST
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

AttrValue = _reflection.GeneratedProtocolMessageType('AttrValue', (_message.Message,), dict(

  ListValue = _reflection.GeneratedProtocolMessageType('ListValue', (_message.Message,), dict(
    DESCRIPTOR = _ATTRVALUE_LISTVALUE,
    __module__ = 'tensorboardX.proto.attr_value_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.AttrValue.ListValue)
    ))
  ,
  DESCRIPTOR = _ATTRVALUE,
  __module__ = 'tensorboardX.proto.attr_value_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.AttrValue)
  ))
_sym_db.RegisterMessage(AttrValue)
_sym_db.RegisterMessage(AttrValue.ListValue)

NameAttrList = _reflection.GeneratedProtocolMessageType('NameAttrList', (_message.Message,), dict(

  AttrEntry = _reflection.GeneratedProtocolMessageType('AttrEntry', (_message.Message,), dict(
    DESCRIPTOR = _NAMEATTRLIST_ATTRENTRY,
    __module__ = 'tensorboardX.proto.attr_value_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.NameAttrList.AttrEntry)
    ))
  ,
  DESCRIPTOR = _NAMEATTRLIST,
  __module__ = 'tensorboardX.proto.attr_value_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.NameAttrList)
  ))
_sym_db.RegisterMessage(NameAttrList)
_sym_db.RegisterMessage(NameAttrList.AttrEntry)


DESCRIPTOR._options = None
_ATTRVALUE_LISTVALUE.fields_by_name['i']._options = None
_ATTRVALUE_LISTVALUE.fields_by_name['f']._options = None
_ATTRVALUE_LISTVALUE.fields_by_name['b']._options = None
_ATTRVALUE_LISTVALUE.fields_by_name['type']._options = None
_NAMEATTRLIST_ATTRENTRY._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/event.proto
================================================
syntax = "proto3";

package tensorboardX;
option cc_enable_arenas = true;
option java_outer_classname = "EventProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.util";

import "tensorboardX/proto/summary.proto";

// Protocol buffer representing an event that happened during
// the execution of a Brain model.
message Event {
  // Timestamp of the event.
  double wall_time = 1;

  // Global step of the event.
  int64 step = 2;

  oneof what {
    // An event file was started, with the specified version.
    // This is use to identify the contents of the record IO files
    // easily.  Current version is "brain.Event:2".  All versions
    // start with "brain.Event:".
    string file_version = 3;
    // An encoded version of a GraphDef.
    bytes graph_def = 4;
    // A summary was generated.
    Summary summary = 5;
    // The user output a log message. Not all messages are logged, only ones
    // generated via the Python tensorboard_logging module.
    LogMessage log_message = 6;
    // The state of the session which can be used for restarting after crashes.
    SessionLog session_log = 7;
    // The metadata returned by running a session.run() call.
    TaggedRunMetadata tagged_run_metadata = 8;
    // An encoded version of a MetaGraphDef.
    bytes meta_graph_def = 9;
  }
}

// Protocol buffer used for logging messages to the events file.
message LogMessage {
  enum Level {
    UNKNOWN = 0;
    DEBUG = 10;
    INFO = 20;
    WARN = 30;
    ERROR = 40;
    FATAL = 50;
  }
  Level level = 1;
  string message = 2;
}

// Protocol buffer used for logging session state.
message SessionLog {
  enum SessionStatus {
    STATUS_UNSPECIFIED = 0;
    START = 1;
    STOP = 2;
    CHECKPOINT = 3;
  }

  SessionStatus status = 1;
  // This checkpoint_path contains both the path and filename.
  string checkpoint_path = 2;
  string msg = 3;
}

// For logging the metadata output for a single session.run() call.
message TaggedRunMetadata {
  // Tag name associated with this metadata.
  string tag = 1;
  // Byte-encoded version of the `RunMetadata` proto in order to allow lazy
  // deserialization.
  bytes run_metadata = 2;
}


================================================
FILE: tensorboardX/tensorboardX/proto/event_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/event.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


from tensorboardX.proto import summary_pb2 as tensorboardX_dot_proto_dot_summary__pb2


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/event.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\023org.tensorflow.utilB\013EventProtosP\001\370\001\001'),
  serialized_pb=_b('\n\x1etensorboardX/proto/event.proto\x12\x0ctensorboardX\x1a tensorboardX/proto/summary.proto\"\xc3\x02\n\x05\x45vent\x12\x11\n\twall_time\x18\x01 \x01(\x01\x12\x0c\n\x04step\x18\x02 \x01(\x03\x12\x16\n\x0c\x66ile_version\x18\x03 \x01(\tH\x00\x12\x13\n\tgraph_def\x18\x04 \x01(\x0cH\x00\x12(\n\x07summary\x18\x05 \x01(\x0b\x32\x15.tensorboardX.SummaryH\x00\x12/\n\x0blog_message\x18\x06 \x01(\x0b\x32\x18.tensorboardX.LogMessageH\x00\x12/\n\x0bsession_log\x18\x07 \x01(\x0b\x32\x18.tensorboardX.SessionLogH\x00\x12>\n\x13tagged_run_metadata\x18\x08 \x01(\x0b\x32\x1f.tensorboardX.TaggedRunMetadataH\x00\x12\x18\n\x0emeta_graph_def\x18\t \x01(\x0cH\x00\x42\x06\n\x04what\"\x97\x01\n\nLogMessage\x12-\n\x05level\x18\x01 \x01(\x0e\x32\x1e.tensorboardX.LogMessage.Level\x12\x0f\n\x07message\x18\x02 \x01(\t\"I\n\x05Level\x12\x0b\n\x07UNKNOWN\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\n\x12\x08\n\x04INFO\x10\x14\x12\x08\n\x04WARN\x10\x1e\x12\t\n\x05\x45RROR\x10(\x12\t\n\x05\x46\x41TAL\x10\x32\"\xb8\x01\n\nSessionLog\x12\x36\n\x06status\x18\x01 \x01(\x0e\x32&.tensorboardX.SessionLog.SessionStatus\x12\x17\n\x0f\x63heckpoint_path\x18\x02 \x01(\t\x12\x0b\n\x03msg\x18\x03 \x01(\t\"L\n\rSessionStatus\x12\x16\n\x12STATUS_UNSPECIFIED\x10\x00\x12\t\n\x05START\x10\x01\x12\x08\n\x04STOP\x10\x02\x12\x0e\n\nCHECKPOINT\x10\x03\"6\n\x11TaggedRunMetadata\x12\x0b\n\x03tag\x18\x01 \x01(\t\x12\x14\n\x0crun_metadata\x18\x02 \x01(\x0c\x42\'\n\x13org.tensorflow.utilB\x0b\x45ventProtosP\x01\xf8\x01\x01\x62\x06proto3')
  ,
  dependencies=[tensorboardX_dot_proto_dot_summary__pb2.DESCRIPTOR,])


_LOGMESSAGE_LEVEL = _descriptor.EnumDescriptor(
  name='Level',
  full_name='tensorboardX.LogMessage.Level',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='UNKNOWN', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DEBUG', index=1, number=10,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='INFO', index=2, number=20,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='WARN', index=3, number=30,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='ERROR', index=4, number=40,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='FATAL', index=5, number=50,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=487,
  serialized_end=560,
)
_sym_db.RegisterEnumDescriptor(_LOGMESSAGE_LEVEL)

_SESSIONLOG_SESSIONSTATUS = _descriptor.EnumDescriptor(
  name='SessionStatus',
  full_name='tensorboardX.SessionLog.SessionStatus',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='STATUS_UNSPECIFIED', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='START', index=1, number=1,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='STOP', index=2, number=2,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='CHECKPOINT', index=3, number=3,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=671,
  serialized_end=747,
)
_sym_db.RegisterEnumDescriptor(_SESSIONLOG_SESSIONSTATUS)


_EVENT = _descriptor.Descriptor(
  name='Event',
  full_name='tensorboardX.Event',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='wall_time', full_name='tensorboardX.Event.wall_time', index=0,
      number=1, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='step', full_name='tensorboardX.Event.step', index=1,
      number=2, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='file_version', full_name='tensorboardX.Event.file_version', index=2,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='graph_def', full_name='tensorboardX.Event.graph_def', index=3,
      number=4, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=_b(""),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='summary', full_name='tensorboardX.Event.summary', index=4,
      number=5, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='log_message', full_name='tensorboardX.Event.log_message', index=5,
      number=6, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='session_log', full_name='tensorboardX.Event.session_log', index=6,
      number=7, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='tagged_run_metadata', full_name='tensorboardX.Event.tagged_run_metadata', index=7,
      number=8, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='meta_graph_def', full_name='tensorboardX.Event.meta_graph_def', index=8,
      number=9, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=_b(""),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
    _descriptor.OneofDescriptor(
      name='what', full_name='tensorboardX.Event.what',
      index=0, containing_type=None, fields=[]),
  ],
  serialized_start=83,
  serialized_end=406,
)


_LOGMESSAGE = _descriptor.Descriptor(
  name='LogMessage',
  full_name='tensorboardX.LogMessage',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='level', full_name='tensorboardX.LogMessage.level', index=0,
      number=1, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='message', full_name='tensorboardX.LogMessage.message', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
    _LOGMESSAGE_LEVEL,
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=409,
  serialized_end=560,
)


_SESSIONLOG = _descriptor.Descriptor(
  name='SessionLog',
  full_name='tensorboardX.SessionLog',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='status', full_name='tensorboardX.SessionLog.status', index=0,
      number=1, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='checkpoint_path', full_name='tensorboardX.SessionLog.checkpoint_path', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='msg', full_name='tensorboardX.SessionLog.msg', index=2,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
    _SESSIONLOG_SESSIONSTATUS,
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=563,
  serialized_end=747,
)


_TAGGEDRUNMETADATA = _descriptor.Descriptor(
  name='TaggedRunMetadata',
  full_name='tensorboardX.TaggedRunMetadata',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='tag', full_name='tensorboardX.TaggedRunMetadata.tag', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='run_metadata', full_name='tensorboardX.TaggedRunMetadata.run_metadata', index=1,
      number=2, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=_b(""),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=749,
  serialized_end=803,
)

_EVENT.fields_by_name['summary'].message_type = tensorboardX_dot_proto_dot_summary__pb2._SUMMARY
_EVENT.fields_by_name['log_message'].message_type = _LOGMESSAGE
_EVENT.fields_by_name['session_log'].message_type = _SESSIONLOG
_EVENT.fields_by_name['tagged_run_metadata'].message_type = _TAGGEDRUNMETADATA
_EVENT.oneofs_by_name['what'].fields.append(
  _EVENT.fields_by_name['file_version'])
_EVENT.fields_by_name['file_version'].containing_oneof = _EVENT.oneofs_by_name['what']
_EVENT.oneofs_by_name['what'].fields.append(
  _EVENT.fields_by_name['graph_def'])
_EVENT.fields_by_name['graph_def'].containing_oneof = _EVENT.oneofs_by_name['what']
_EVENT.oneofs_by_name['what'].fields.append(
  _EVENT.fields_by_name['summary'])
_EVENT.fields_by_name['summary'].containing_oneof = _EVENT.oneofs_by_name['what']
_EVENT.oneofs_by_name['what'].fields.append(
  _EVENT.fields_by_name['log_message'])
_EVENT.fields_by_name['log_message'].containing_oneof = _EVENT.oneofs_by_name['what']
_EVENT.oneofs_by_name['what'].fields.append(
  _EVENT.fields_by_name['session_log'])
_EVENT.fields_by_name['session_log'].containing_oneof = _EVENT.oneofs_by_name['what']
_EVENT.oneofs_by_name['what'].fields.append(
  _EVENT.fields_by_name['tagged_run_metadata'])
_EVENT.fields_by_name['tagged_run_metadata'].containing_oneof = _EVENT.oneofs_by_name['what']
_EVENT.oneofs_by_name['what'].fields.append(
  _EVENT.fields_by_name['meta_graph_def'])
_EVENT.fields_by_name['meta_graph_def'].containing_oneof = _EVENT.oneofs_by_name['what']
_LOGMESSAGE.fields_by_name['level'].enum_type = _LOGMESSAGE_LEVEL
_LOGMESSAGE_LEVEL.containing_type = _LOGMESSAGE
_SESSIONLOG.fields_by_name['status'].enum_type = _SESSIONLOG_SESSIONSTATUS
_SESSIONLOG_SESSIONSTATUS.containing_type = _SESSIONLOG
DESCRIPTOR.message_types_by_name['Event'] = _EVENT
DESCRIPTOR.message_types_by_name['LogMessage'] = _LOGMESSAGE
DESCRIPTOR.message_types_by_name['SessionLog'] = _SESSIONLOG
DESCRIPTOR.message_types_by_name['TaggedRunMetadata'] = _TAGGEDRUNMETADATA
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

Event = _reflection.GeneratedProtocolMessageType('Event', (_message.Message,), dict(
  DESCRIPTOR = _EVENT,
  __module__ = 'tensorboardX.proto.event_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.Event)
  ))
_sym_db.RegisterMessage(Event)

LogMessage = _reflection.GeneratedProtocolMessageType('LogMessage', (_message.Message,), dict(
  DESCRIPTOR = _LOGMESSAGE,
  __module__ = 'tensorboardX.proto.event_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.LogMessage)
  ))
_sym_db.RegisterMessage(LogMessage)

SessionLog = _reflection.GeneratedProtocolMessageType('SessionLog', (_message.Message,), dict(
  DESCRIPTOR = _SESSIONLOG,
  __module__ = 'tensorboardX.proto.event_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.SessionLog)
  ))
_sym_db.RegisterMessage(SessionLog)

TaggedRunMetadata = _reflection.GeneratedProtocolMessageType('TaggedRunMetadata', (_message.Message,), dict(
  DESCRIPTOR = _TAGGEDRUNMETADATA,
  __module__ = 'tensorboardX.proto.event_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.TaggedRunMetadata)
  ))
_sym_db.RegisterMessage(TaggedRunMetadata)


DESCRIPTOR._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/graph.proto
================================================
syntax = "proto3";

package tensorboardX;
option cc_enable_arenas = true;
option java_outer_classname = "GraphProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "tensorboardX/proto/node_def.proto";
//import "tensorflow/core/framework/function.proto";
import "tensorboardX/proto/versions.proto";

// Represents the graph of operations
message GraphDef {
  repeated NodeDef node = 1;

  // Compatibility versions of the graph.  See core/public/version.h for version
  // history.  The GraphDef version is distinct from the TensorFlow version, and
  // each release of TensorFlow will support a range of GraphDef versions.
  VersionDef versions = 4;

  // Deprecated single version field; use versions above instead.  Since all
  // GraphDef changes before "versions" was introduced were forward
  // compatible, this field is entirely ignored.
  int32 version = 3 [deprecated = true];

  // EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET.
  //
  // "library" provides user-defined functions.
  //
  // Naming:
  //   * library.function.name are in a flat namespace.
  //     NOTE: We may need to change it to be hierarchical to support
  //     different orgs. E.g.,
  //     { "/google/nn", { ... }},
  //     { "/google/vision", { ... }}
  //     { "/org_foo/module_bar", { ... }}
  //     map<string, FunctionDefLib> named_lib;
  //   * If node[i].op is the name of one function in "library",
  //     node[i] is deemed as a function call. Otherwise, node[i].op
  //     must be a primitive operation supported by the runtime.
  //
  //
  // Function call semantics:
  //
  //   * The callee may start execution as soon as some of its inputs
  //     are ready. The caller may want to use Tuple() mechanism to
  //     ensure all inputs are ready in the same time.
  //
  //   * The consumer of return values may start executing as soon as
  //     the return values the consumer depends on are ready.  The
  //     consumer may want to use Tuple() mechanism to ensure the
  //     consumer does not start until all return values of the callee
  //     function are ready.
  //FunctionDefLibrary library = 2;
};


================================================
FILE: tensorboardX/tensorboardX/proto/graph_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/graph.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


from tensorboardX.proto import node_def_pb2 as tensorboardX_dot_proto_dot_node__def__pb2
from tensorboardX.proto import versions_pb2 as tensorboardX_dot_proto_dot_versions__pb2


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/graph.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\030org.tensorflow.frameworkB\013GraphProtosP\001\370\001\001'),
  serialized_pb=_b('\n\x1etensorboardX/proto/graph.proto\x12\x0ctensorboardX\x1a!tensorboardX/proto/node_def.proto\x1a!tensorboardX/proto/versions.proto\"p\n\x08GraphDef\x12#\n\x04node\x18\x01 \x03(\x0b\x32\x15.tensorboardX.NodeDef\x12*\n\x08versions\x18\x04 \x01(\x0b\x32\x18.tensorboardX.VersionDef\x12\x13\n\x07version\x18\x03 \x01(\x05\x42\x02\x18\x01\x42,\n\x18org.tensorflow.frameworkB\x0bGraphProtosP\x01\xf8\x01\x01\x62\x06proto3')
  ,
  dependencies=[tensorboardX_dot_proto_dot_node__def__pb2.DESCRIPTOR,tensorboardX_dot_proto_dot_versions__pb2.DESCRIPTOR,])


_GRAPHDEF = _descriptor.Descriptor(
  name='GraphDef',
  full_name='tensorboardX.GraphDef',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='node', full_name='tensorboardX.GraphDef.node', index=0,
      number=1, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='versions', full_name='tensorboardX.GraphDef.versions', index=1,
      number=4, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='version', full_name='tensorboardX.GraphDef.version', index=2,
      number=3, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\030\001'), file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=118,
  serialized_end=230,
)

_GRAPHDEF.fields_by_name['node'].message_type = tensorboardX_dot_proto_dot_node__def__pb2._NODEDEF
_GRAPHDEF.fields_by_name['versions'].message_type = tensorboardX_dot_proto_dot_versions__pb2._VERSIONDEF
DESCRIPTOR.message_types_by_name['GraphDef'] = _GRAPHDEF
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

GraphDef = _reflection.GeneratedProtocolMessageType('GraphDef', (_message.Message,), dict(
  DESCRIPTOR = _GRAPHDEF,
  __module__ = 'tensorboardX.proto.graph_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.GraphDef)
  ))
_sym_db.RegisterMessage(GraphDef)


DESCRIPTOR._options = None
_GRAPHDEF.fields_by_name['version']._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/layout.proto
================================================
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

syntax = "proto3";

package tensorboardX;


/**
 * Encapsulates information on a single chart. Many charts appear in a category.
 */
message Chart {
  // The title shown atop this chart. Optional. Defaults to 'untitled'.
  string title = 1;

  // The content of the chart. This depends on the type of the chart.
  oneof content {
    MultilineChartContent multiline = 2;
    MarginChartContent margin = 3;
  }
}

/**
 * Encapsulates information on a single line chart. This line chart may have
 * lines associated with several tags.
 */
message MultilineChartContent {
  // A list of regular expressions for tags that should appear in this chart.
  // Tags are matched from beginning to end. Each regex captures a set of tags.
  repeated string tag = 1;
}

/**
 * Encapsulates information on a single margin chart. A margin chart uses fill
 * area to visualize lower and upper bounds that surround a value.
 */
message MarginChartContent {
  /**
   * Encapsulates a tag of data for the chart.
   */
  message Series {
    // The exact tag string associated with the scalar summaries making up the
    // main value between the bounds.
    string value = 1;

    // The exact tag string associated with the scalar summaries making up the
    // lower bound.
    string lower = 2;

    // The exact tag string associated with the scalar summaries making up the
    // upper bound.
    string upper = 3;
  }

  // A list of data series to include within this margin chart.
  repeated Series series = 1;
}

/**
 * A category contains a group of charts. Each category maps to a collapsible
 * within the dashboard.
 */
message Category {
  // This string appears atop each grouping of charts within the dashboard.
  string title = 1;

  // Encapsulates data on charts to be shown in the category.
  repeated Chart chart = 2;

  // Whether this category should be initially closed. False by default.
  bool closed = 3;
}

/**
 * A layout encapsulates how charts are laid out within the custom scalars
 * dashboard.
 */
message Layout {
  // Version `0` is the only supported version.
  int32 version = 1;

  // The categories here are rendered from top to bottom.
  repeated Category category = 2;
}


================================================
FILE: tensorboardX/tensorboardX/proto/layout_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/layout.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/layout.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=None,
  serialized_pb=_b('\n\x1ftensorboardX/proto/layout.proto\x12\x0ctensorboardX\"\x8f\x01\n\x05\x43hart\x12\r\n\x05title\x18\x01 \x01(\t\x12\x38\n\tmultiline\x18\x02 \x01(\x0b\x32#.tensorboardX.MultilineChartContentH\x00\x12\x32\n\x06margin\x18\x03 \x01(\x0b\x32 .tensorboardX.MarginChartContentH\x00\x42\t\n\x07\x63ontent\"$\n\x15MultilineChartContent\x12\x0b\n\x03tag\x18\x01 \x03(\t\"\x84\x01\n\x12MarginChartContent\x12\x37\n\x06series\x18\x01 \x03(\x0b\x32\'.tensorboardX.MarginChartContent.Series\x1a\x35\n\x06Series\x12\r\n\x05value\x18\x01 \x01(\t\x12\r\n\x05lower\x18\x02 \x01(\t\x12\r\n\x05upper\x18\x03 \x01(\t\"M\n\x08\x43\x61tegory\x12\r\n\x05title\x18\x01 \x01(\t\x12\"\n\x05\x63hart\x18\x02 \x03(\x0b\x32\x13.tensorboardX.Chart\x12\x0e\n\x06\x63losed\x18\x03 \x01(\x08\"C\n\x06Layout\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12(\n\x08\x63\x61tegory\x18\x02 \x03(\x0b\x32\x16.tensorboardX.Categoryb\x06proto3')
)


_CHART = _descriptor.Descriptor(
  name='Chart',
  full_name='tensorboardX.Chart',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='title', full_name='tensorboardX.Chart.title', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='multiline', full_name='tensorboardX.Chart.multiline', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='margin', full_name='tensorboardX.Chart.margin', index=2,
      number=3, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
    _descriptor.OneofDescriptor(
      name='content', full_name='tensorboardX.Chart.content',
      index=0, containing_type=None, fields=[]),
  ],
  serialized_start=50,
  serialized_end=193,
)


_MULTILINECHARTCONTENT = _descriptor.Descriptor(
  name='MultilineChartContent',
  full_name='tensorboardX.MultilineChartContent',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='tag', full_name='tensorboardX.MultilineChartContent.tag', index=0,
      number=1, type=9, cpp_type=9, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=195,
  serialized_end=231,
)


_MARGINCHARTCONTENT_SERIES = _descriptor.Descriptor(
  name='Series',
  full_name='tensorboardX.MarginChartContent.Series',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='value', full_name='tensorboardX.MarginChartContent.Series.value', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='lower', full_name='tensorboardX.MarginChartContent.Series.lower', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='upper', full_name='tensorboardX.MarginChartContent.Series.upper', index=2,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=313,
  serialized_end=366,
)

_MARGINCHARTCONTENT = _descriptor.Descriptor(
  name='MarginChartContent',
  full_name='tensorboardX.MarginChartContent',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='series', full_name='tensorboardX.MarginChartContent.series', index=0,
      number=1, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_MARGINCHARTCONTENT_SERIES, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=234,
  serialized_end=366,
)


_CATEGORY = _descriptor.Descriptor(
  name='Category',
  full_name='tensorboardX.Category',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='title', full_name='tensorboardX.Category.title', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='chart', full_name='tensorboardX.Category.chart', index=1,
      number=2, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='closed', full_name='tensorboardX.Category.closed', index=2,
      number=3, type=8, cpp_type=7, label=1,
      has_default_value=False, default_value=False,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=368,
  serialized_end=445,
)


_LAYOUT = _descriptor.Descriptor(
  name='Layout',
  full_name='tensorboardX.Layout',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='version', full_name='tensorboardX.Layout.version', index=0,
      number=1, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='category', full_name='tensorboardX.Layout.category', index=1,
      number=2, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=447,
  serialized_end=514,
)

_CHART.fields_by_name['multiline'].message_type = _MULTILINECHARTCONTENT
_CHART.fields_by_name['margin'].message_type = _MARGINCHARTCONTENT
_CHART.oneofs_by_name['content'].fields.append(
  _CHART.fields_by_name['multiline'])
_CHART.fields_by_name['multiline'].containing_oneof = _CHART.oneofs_by_name['content']
_CHART.oneofs_by_name['content'].fields.append(
  _CHART.fields_by_name['margin'])
_CHART.fields_by_name['margin'].containing_oneof = _CHART.oneofs_by_name['content']
_MARGINCHARTCONTENT_SERIES.containing_type = _MARGINCHARTCONTENT
_MARGINCHARTCONTENT.fields_by_name['series'].message_type = _MARGINCHARTCONTENT_SERIES
_CATEGORY.fields_by_name['chart'].message_type = _CHART
_LAYOUT.fields_by_name['category'].message_type = _CATEGORY
DESCRIPTOR.message_types_by_name['Chart'] = _CHART
DESCRIPTOR.message_types_by_name['MultilineChartContent'] = _MULTILINECHARTCONTENT
DESCRIPTOR.message_types_by_name['MarginChartContent'] = _MARGINCHARTCONTENT
DESCRIPTOR.message_types_by_name['Category'] = _CATEGORY
DESCRIPTOR.message_types_by_name['Layout'] = _LAYOUT
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

Chart = _reflection.GeneratedProtocolMessageType('Chart', (_message.Message,), dict(
  DESCRIPTOR = _CHART,
  __module__ = 'tensorboardX.proto.layout_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.Chart)
  ))
_sym_db.RegisterMessage(Chart)

MultilineChartContent = _reflection.GeneratedProtocolMessageType('MultilineChartContent', (_message.Message,), dict(
  DESCRIPTOR = _MULTILINECHARTCONTENT,
  __module__ = 'tensorboardX.proto.layout_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.MultilineChartContent)
  ))
_sym_db.RegisterMessage(MultilineChartContent)

MarginChartContent = _reflection.GeneratedProtocolMessageType('MarginChartContent', (_message.Message,), dict(

  Series = _reflection.GeneratedProtocolMessageType('Series', (_message.Message,), dict(
    DESCRIPTOR = _MARGINCHARTCONTENT_SERIES,
    __module__ = 'tensorboardX.proto.layout_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.MarginChartContent.Series)
    ))
  ,
  DESCRIPTOR = _MARGINCHARTCONTENT,
  __module__ = 'tensorboardX.proto.layout_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.MarginChartContent)
  ))
_sym_db.RegisterMessage(MarginChartContent)
_sym_db.RegisterMessage(MarginChartContent.Series)

Category = _reflection.GeneratedProtocolMessageType('Category', (_message.Message,), dict(
  DESCRIPTOR = _CATEGORY,
  __module__ = 'tensorboardX.proto.layout_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.Category)
  ))
_sym_db.RegisterMessage(Category)

Layout = _reflection.GeneratedProtocolMessageType('Layout', (_message.Message,), dict(
  DESCRIPTOR = _LAYOUT,
  __module__ = 'tensorboardX.proto.layout_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.Layout)
  ))
_sym_db.RegisterMessage(Layout)


# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/node_def.proto
================================================
syntax = "proto3";

package tensorboardX;
option cc_enable_arenas = true;
option java_outer_classname = "NodeProto";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "tensorboardX/proto/attr_value.proto";

message NodeDef {
  // The name given to this operator. Used for naming inputs,
  // logging, visualization, etc.  Unique within a single GraphDef.
  // Must match the regexp "[A-Za-z0-9.][A-Za-z0-9_./]*".
  string name = 1;

  // The operation name.  There may be custom parameters in attrs.
  // Op names starting with an underscore are reserved for internal use.
  string op = 2;

  // Each input is "node:src_output" with "node" being a string name and
  // "src_output" indicating which output tensor to use from "node". If
  // "src_output" is 0 the ":0" suffix can be omitted.  Regular inputs
  // may optionally be followed by control inputs that have the format
  // "^node".
  repeated string input = 3;

  // A (possibly partial) specification for the device on which this
  // node should be placed.
  // The expected syntax for this string is as follows:
  //
  // DEVICE_SPEC ::= PARTIAL_SPEC
  //
  // PARTIAL_SPEC ::= ("/" CONSTRAINT) *
  // CONSTRAINT ::= ("job:" JOB_NAME)
  //              | ("replica:" [1-9][0-9]*)
  //              | ("task:" [1-9][0-9]*)
  //              | ( ("gpu" | "cpu") ":" ([1-9][0-9]* | "*") )
  //
  // Valid values for this string include:
  // * "/job:worker/replica:0/task:1/gpu:3"  (full specification)
  // * "/job:worker/gpu:3"                   (partial specification)
  // * ""                                    (no specification)
  //
  // If the constraints do not resolve to a single device (or if this
  // field is empty or not present), the runtime will attempt to
  // choose a device automatically.
  string device = 4;

  // Operation-specific graph-construction-time configuration.
  // Note that this should include all attrs defined in the
  // corresponding OpDef, including those with a value matching
  // the default -- this allows the default to change and makes
  // NodeDefs easier to interpret on their own.  However, if
  // an attr with a default is not specified in this list, the
  // default will be used.
  // The "names" (keys) must match the regexp "[a-z][a-z0-9_]+" (and
  // one of the names from the corresponding OpDef's attr field).
  // The values must have a type matching the corresponding OpDef
  // attr's type field.
  // TODO(josh11b): Add some examples here showing best practices.
  map<string, AttrValue> attr = 5;
};


================================================
FILE: tensorboardX/tensorboardX/proto/node_def_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/node_def.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


from tensorboardX.proto import attr_value_pb2 as tensorboardX_dot_proto_dot_attr__value__pb2


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/node_def.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\030org.tensorflow.frameworkB\tNodeProtoP\001\370\001\001'),
  serialized_pb=_b('\n!tensorboardX/proto/node_def.proto\x12\x0ctensorboardX\x1a#tensorboardX/proto/attr_value.proto\"\xb7\x01\n\x07NodeDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02op\x18\x02 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12\x0e\n\x06\x64\x65vice\x18\x04 \x01(\t\x12-\n\x04\x61ttr\x18\x05 \x03(\x0b\x32\x1f.tensorboardX.NodeDef.AttrEntry\x1a\x44\n\tAttrEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12&\n\x05value\x18\x02 \x01(\x0b\x32\x17.tensorboardX.AttrValue:\x02\x38\x01\x42*\n\x18org.tensorflow.frameworkB\tNodeProtoP\x01\xf8\x01\x01\x62\x06proto3')
  ,
  dependencies=[tensorboardX_dot_proto_dot_attr__value__pb2.DESCRIPTOR,])


_NODEDEF_ATTRENTRY = _descriptor.Descriptor(
  name='AttrEntry',
  full_name='tensorboardX.NodeDef.AttrEntry',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='key', full_name='tensorboardX.NodeDef.AttrEntry.key', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='value', full_name='tensorboardX.NodeDef.AttrEntry.value', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=_b('8\001'),
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=204,
  serialized_end=272,
)

_NODEDEF = _descriptor.Descriptor(
  name='NodeDef',
  full_name='tensorboardX.NodeDef',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.NodeDef.name', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='op', full_name='tensorboardX.NodeDef.op', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='input', full_name='tensorboardX.NodeDef.input', index=2,
      number=3, type=9, cpp_type=9, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='device', full_name='tensorboardX.NodeDef.device', index=3,
      number=4, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='attr', full_name='tensorboardX.NodeDef.attr', index=4,
      number=5, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_NODEDEF_ATTRENTRY, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=89,
  serialized_end=272,
)

_NODEDEF_ATTRENTRY.fields_by_name['value'].message_type = tensorboardX_dot_proto_dot_attr__value__pb2._ATTRVALUE
_NODEDEF_ATTRENTRY.containing_type = _NODEDEF
_NODEDEF.fields_by_name['attr'].message_type = _NODEDEF_ATTRENTRY
DESCRIPTOR.message_types_by_name['NodeDef'] = _NODEDEF
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

NodeDef = _reflection.GeneratedProtocolMessageType('NodeDef', (_message.Message,), dict(

  AttrEntry = _reflection.GeneratedProtocolMessageType('AttrEntry', (_message.Message,), dict(
    DESCRIPTOR = _NODEDEF_ATTRENTRY,
    __module__ = 'tensorboardX.proto.node_def_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.NodeDef.AttrEntry)
    ))
  ,
  DESCRIPTOR = _NODEDEF,
  __module__ = 'tensorboardX.proto.node_def_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.NodeDef)
  ))
_sym_db.RegisterMessage(NodeDef)
_sym_db.RegisterMessage(NodeDef.AttrEntry)


DESCRIPTOR._options = None
_NODEDEF_ATTRENTRY._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/plugin_hparams.proto
================================================
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

// Defines protos for storing a hypertuning experiment data inside Summary tags.
//
// A hypertuning-experiment data consists of metadata that's constant
// throughout the experiment and evolving metric data for each training session
// in the experiment. The HParams plugin assumes the following organization of
// this entire data set. Experiment metadata is recorded in the empty run in a
// tag (named by the Python constant) metadata.EXPERIMENT_TAG. Within the
// experiment, for a session named by <session_name> its metadata is recorded
// in the run <session_name> in the tags metadata.SESSION_START_INFO and
// metadata.SESSION_END_INFO. Finally, the session's metric data for a metric
// with a (<group>, <tag>) name (see MetricName in api.proto), is recorded
// in a Scalar-plugin summary with tag <tag> in the run <session_name><group>.

syntax = "proto3";

import "tensorboardX/proto/api.proto";
import "google/protobuf/struct.proto";

package tensorboardX.hparam;

// HParam summaries created by `tensorboard.plugins.hparams.summary`
// module will include `SummaryMetadata` whose `plugin_data` field has
// as `content` a serialized HParamsPluginData message.
message HParamsPluginData {
  // The version of the plugin data schema.
  int32 version = 1;
  oneof data {
    Experiment experiment = 2;
    SessionStartInfo session_start_info = 3;
    SessionEndInfo session_end_info = 4;
  }
}

message SessionStartInfo {
  // A map describing the hyperparameter values for the session.
  // Maps each hyperparameter name to its value.
  // Currently only scalars are supported.
  map<string, google.protobuf.Value> hparams = 1;

  // A URI for where checkpoints are saved.
  string model_uri = 2;

  // An optional URL to a website monitoring the session.
  string monitor_url = 3;

  // The name of the session group containing this session. If empty, the
  // group name is taken to be the session id (so this session is the only
  // member of its group).
  string group_name = 4;

  // The time the session started in seconds since epoch.
  double start_time_secs = 5;
}

message SessionEndInfo {
  Status status = 1;

  // The time the session ended in seconds since epoch.
  double end_time_secs = 2;
}


================================================
FILE: tensorboardX/tensorboardX/proto/plugin_hparams_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/plugin_hparams.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


from tensorboardX.proto import api_pb2 as tensorboardX_dot_proto_dot_api__pb2
from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/plugin_hparams.proto',
  package='tensorboardX.hparam',
  syntax='proto3',
  serialized_options=None,
  serialized_pb=_b('\n\'tensorboardX/proto/plugin_hparams.proto\x12\x13tensorboardX.hparam\x1a\x1ctensorboardX/proto/api.proto\x1a\x1cgoogle/protobuf/struct.proto\"\xe9\x01\n\x11HParamsPluginData\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x35\n\nexperiment\x18\x02 \x01(\x0b\x32\x1f.tensorboardX.hparam.ExperimentH\x00\x12\x43\n\x12session_start_info\x18\x03 \x01(\x0b\x32%.tensorboardX.hparam.SessionStartInfoH\x00\x12?\n\x10session_end_info\x18\x04 \x01(\x0b\x32#.tensorboardX.hparam.SessionEndInfoH\x00\x42\x06\n\x04\x64\x61ta\"\xf4\x01\n\x10SessionStartInfo\x12\x43\n\x07hparams\x18\x01 \x03(\x0b\x32\x32.tensorboardX.hparam.SessionStartInfo.HparamsEntry\x12\x11\n\tmodel_uri\x18\x02 \x01(\t\x12\x13\n\x0bmonitor_url\x18\x03 \x01(\t\x12\x12\n\ngroup_name\x18\x04 \x01(\t\x12\x17\n\x0fstart_time_secs\x18\x05 \x01(\x01\x1a\x46\n\x0cHparamsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.google.protobuf.Value:\x02\x38\x01\"T\n\x0eSessionEndInfo\x12+\n\x06status\x18\x01 \x01(\x0e\x32\x1b.tensorboardX.hparam.Status\x12\x15\n\rend_time_secs\x18\x02 \x01(\x01\x62\x06proto3')
  ,
  dependencies=[tensorboardX_dot_proto_dot_api__pb2.DESCRIPTOR,google_dot_protobuf_dot_struct__pb2.DESCRIPTOR,])


_HPARAMSPLUGINDATA = _descriptor.Descriptor(
  name='HParamsPluginData',
  full_name='tensorboardX.hparam.HParamsPluginData',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='version', full_name='tensorboardX.hparam.HParamsPluginData.version', index=0,
      number=1, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='experiment', full_name='tensorboardX.hparam.HParamsPluginData.experiment', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='session_start_info', full_name='tensorboardX.hparam.HParamsPluginData.session_start_info', index=2,
      number=3, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='session_end_info', full_name='tensorboardX.hparam.HParamsPluginData.session_end_info', index=3,
      number=4, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
    _descriptor.OneofDescriptor(
      name='data', full_name='tensorboardX.hparam.HParamsPluginData.data',
      index=0, containing_type=None, fields=[]),
  ],
  serialized_start=125,
  serialized_end=358,
)


_SESSIONSTARTINFO_HPARAMSENTRY = _descriptor.Descriptor(
  name='HparamsEntry',
  full_name='tensorboardX.hparam.SessionStartInfo.HparamsEntry',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='key', full_name='tensorboardX.hparam.SessionStartInfo.HparamsEntry.key', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='value', full_name='tensorboardX.hparam.SessionStartInfo.HparamsEntry.value', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=_b('8\001'),
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=535,
  serialized_end=605,
)

_SESSIONSTARTINFO = _descriptor.Descriptor(
  name='SessionStartInfo',
  full_name='tensorboardX.hparam.SessionStartInfo',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='hparams', full_name='tensorboardX.hparam.SessionStartInfo.hparams', index=0,
      number=1, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='model_uri', full_name='tensorboardX.hparam.SessionStartInfo.model_uri', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='monitor_url', full_name='tensorboardX.hparam.SessionStartInfo.monitor_url', index=2,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='group_name', full_name='tensorboardX.hparam.SessionStartInfo.group_name', index=3,
      number=4, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='start_time_secs', full_name='tensorboardX.hparam.SessionStartInfo.start_time_secs', index=4,
      number=5, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_SESSIONSTARTINFO_HPARAMSENTRY, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=361,
  serialized_end=605,
)


_SESSIONENDINFO = _descriptor.Descriptor(
  name='SessionEndInfo',
  full_name='tensorboardX.hparam.SessionEndInfo',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='status', full_name='tensorboardX.hparam.SessionEndInfo.status', index=0,
      number=1, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='end_time_secs', full_name='tensorboardX.hparam.SessionEndInfo.end_time_secs', index=1,
      number=2, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=607,
  serialized_end=691,
)

_HPARAMSPLUGINDATA.fields_by_name['experiment'].message_type = tensorboardX_dot_proto_dot_api__pb2._EXPERIMENT
_HPARAMSPLUGINDATA.fields_by_name['session_start_info'].message_type = _SESSIONSTARTINFO
_HPARAMSPLUGINDATA.fields_by_name['session_end_info'].message_type = _SESSIONENDINFO
_HPARAMSPLUGINDATA.oneofs_by_name['data'].fields.append(
  _HPARAMSPLUGINDATA.fields_by_name['experiment'])
_HPARAMSPLUGINDATA.fields_by_name['experiment'].containing_oneof = _HPARAMSPLUGINDATA.oneofs_by_name['data']
_HPARAMSPLUGINDATA.oneofs_by_name['data'].fields.append(
  _HPARAMSPLUGINDATA.fields_by_name['session_start_info'])
_HPARAMSPLUGINDATA.fields_by_name['session_start_info'].containing_oneof = _HPARAMSPLUGINDATA.oneofs_by_name['data']
_HPARAMSPLUGINDATA.oneofs_by_name['data'].fields.append(
  _HPARAMSPLUGINDATA.fields_by_name['session_end_info'])
_HPARAMSPLUGINDATA.fields_by_name['session_end_info'].containing_oneof = _HPARAMSPLUGINDATA.oneofs_by_name['data']
_SESSIONSTARTINFO_HPARAMSENTRY.fields_by_name['value'].message_type = google_dot_protobuf_dot_struct__pb2._VALUE
_SESSIONSTARTINFO_HPARAMSENTRY.containing_type = _SESSIONSTARTINFO
_SESSIONSTARTINFO.fields_by_name['hparams'].message_type = _SESSIONSTARTINFO_HPARAMSENTRY
_SESSIONENDINFO.fields_by_name['status'].enum_type = tensorboardX_dot_proto_dot_api__pb2._STATUS
DESCRIPTOR.message_types_by_name['HParamsPluginData'] = _HPARAMSPLUGINDATA
DESCRIPTOR.message_types_by_name['SessionStartInfo'] = _SESSIONSTARTINFO
DESCRIPTOR.message_types_by_name['SessionEndInfo'] = _SESSIONENDINFO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

HParamsPluginData = _reflection.GeneratedProtocolMessageType('HParamsPluginData', (_message.Message,), dict(
  DESCRIPTOR = _HPARAMSPLUGINDATA,
  __module__ = 'tensorboardX.proto.plugin_hparams_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.HParamsPluginData)
  ))
_sym_db.RegisterMessage(HParamsPluginData)

SessionStartInfo = _reflection.GeneratedProtocolMessageType('SessionStartInfo', (_message.Message,), dict(

  HparamsEntry = _reflection.GeneratedProtocolMessageType('HparamsEntry', (_message.Message,), dict(
    DESCRIPTOR = _SESSIONSTARTINFO_HPARAMSENTRY,
    __module__ = 'tensorboardX.proto.plugin_hparams_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.hparam.SessionStartInfo.HparamsEntry)
    ))
  ,
  DESCRIPTOR = _SESSIONSTARTINFO,
  __module__ = 'tensorboardX.proto.plugin_hparams_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.SessionStartInfo)
  ))
_sym_db.RegisterMessage(SessionStartInfo)
_sym_db.RegisterMessage(SessionStartInfo.HparamsEntry)

SessionEndInfo = _reflection.GeneratedProtocolMessageType('SessionEndInfo', (_message.Message,), dict(
  DESCRIPTOR = _SESSIONENDINFO,
  __module__ = 'tensorboardX.proto.plugin_hparams_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.hparam.SessionEndInfo)
  ))
_sym_db.RegisterMessage(SessionEndInfo)


_SESSIONSTARTINFO_HPARAMSENTRY._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/plugin_mesh.proto
================================================
syntax = "proto3";

package tensorboardX.mesh;

// A MeshPluginData encapsulates information on which plugins are able to make
// use of a certain summary value.
message MeshPluginData {
  enum ContentType {
    UNDEFINED = 0;
    VERTEX = 1;
    FACE = 2;  // Triangle face.
    COLOR = 3;
  }

  // Version `0` is the only supported version.
  int32 version = 1;

  // The name of the mesh summary this particular summary belongs to.
  string name = 2;

  // Type of data in the summary.
  ContentType content_type = 3;

  // JSON-serialized dictionary of ThreeJS classes configuration.
  string json_config = 5;

  // Shape of underlying data. Cache it here for performance reasons.
  repeated int32 shape = 6;
}


================================================
FILE: tensorboardX/tensorboardX/proto/plugin_mesh_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/plugin_mesh.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/plugin_mesh.proto',
  package='tensorboardX.mesh',
  syntax='proto3',
  serialized_options=None,
  serialized_pb=_b('\n$tensorboardX/proto/plugin_mesh.proto\x12\x11tensorboardX.mesh\"\xd7\x01\n\x0eMeshPluginData\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x43\n\x0c\x63ontent_type\x18\x03 \x01(\x0e\x32-.tensorboardX.mesh.MeshPluginData.ContentType\x12\x13\n\x0bjson_config\x18\x05 \x01(\t\x12\r\n\x05shape\x18\x06 \x03(\x05\"=\n\x0b\x43ontentType\x12\r\n\tUNDEFINED\x10\x00\x12\n\n\x06VERTEX\x10\x01\x12\x08\n\x04\x46\x41\x43\x45\x10\x02\x12\t\n\x05\x43OLOR\x10\x03\x62\x06proto3')
)


_MESHPLUGINDATA_CONTENTTYPE = _descriptor.EnumDescriptor(
  name='ContentType',
  full_name='tensorboardX.mesh.MeshPluginData.ContentType',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='UNDEFINED', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='VERTEX', index=1, number=1,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='FACE', index=2, number=2,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='COLOR', index=3, number=3,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=214,
  serialized_end=275,
)
_sym_db.RegisterEnumDescriptor(_MESHPLUGINDATA_CONTENTTYPE)


_MESHPLUGINDATA = _descriptor.Descriptor(
  name='MeshPluginData',
  full_name='tensorboardX.mesh.MeshPluginData',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='version', full_name='tensorboardX.mesh.MeshPluginData.version', index=0,
      number=1, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.mesh.MeshPluginData.name', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='content_type', full_name='tensorboardX.mesh.MeshPluginData.content_type', index=2,
      number=3, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='json_config', full_name='tensorboardX.mesh.MeshPluginData.json_config', index=3,
      number=5, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='shape', full_name='tensorboardX.mesh.MeshPluginData.shape', index=4,
      number=6, type=5, cpp_type=1, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
    _MESHPLUGINDATA_CONTENTTYPE,
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=60,
  serialized_end=275,
)

_MESHPLUGINDATA.fields_by_name['content_type'].enum_type = _MESHPLUGINDATA_CONTENTTYPE
_MESHPLUGINDATA_CONTENTTYPE.containing_type = _MESHPLUGINDATA
DESCRIPTOR.message_types_by_name['MeshPluginData'] = _MESHPLUGINDATA
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

MeshPluginData = _reflection.GeneratedProtocolMessageType('MeshPluginData', (_message.Message,), dict(
  DESCRIPTOR = _MESHPLUGINDATA,
  __module__ = 'tensorboardX.proto.plugin_mesh_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.mesh.MeshPluginData)
  ))
_sym_db.RegisterMessage(MeshPluginData)


# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/plugin_pr_curve.proto
================================================
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

syntax = "proto3";

package tensorboardX;

message PrCurvePluginData {
  // Version `0` is the only supported version.
  int32 version = 1;

  uint32 num_thresholds = 2;
}


================================================
FILE: tensorboardX/tensorboardX/proto/plugin_pr_curve_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/plugin_pr_curve.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/plugin_pr_curve.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=None,
  serialized_pb=_b('\n(tensorboardX/proto/plugin_pr_curve.proto\x12\x0ctensorboardX\"<\n\x11PrCurvePluginData\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x16\n\x0enum_thresholds\x18\x02 \x01(\rb\x06proto3')
)


_PRCURVEPLUGINDATA = _descriptor.Descriptor(
  name='PrCurvePluginData',
  full_name='tensorboardX.PrCurvePluginData',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='version', full_name='tensorboardX.PrCurvePluginData.version', index=0,
      number=1, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='num_thresholds', full_name='tensorboardX.PrCurvePluginData.num_thresholds', index=1,
      number=2, type=13, cpp_type=3, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=58,
  serialized_end=118,
)

DESCRIPTOR.message_types_by_name['PrCurvePluginData'] = _PRCURVEPLUGINDATA
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

PrCurvePluginData = _reflection.GeneratedProtocolMessageType('PrCurvePluginData', (_message.Message,), dict(
  DESCRIPTOR = _PRCURVEPLUGINDATA,
  __module__ = 'tensorboardX.proto.plugin_pr_curve_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.PrCurvePluginData)
  ))
_sym_db.RegisterMessage(PrCurvePluginData)


# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/plugin_text.proto
================================================
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

syntax = "proto3";

package tensorboardX;

// Text summaries created by the `tensorboard.plugins.text.summary`
// module will include `SummaryMetadata` whose `plugin_data` field has
// as `content` a binary string that is the encoding of an
// `TextPluginData` proto.
message TextPluginData {
  // Version `0` is the only supported version.
  int32 version = 1;
}


================================================
FILE: tensorboardX/tensorboardX/proto/plugin_text_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/plugin_text.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/plugin_text.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=None,
  serialized_pb=_b('\n$tensorboardX/proto/plugin_text.proto\x12\x0ctensorboardX\"!\n\x0eTextPluginData\x12\x0f\n\x07version\x18\x01 \x01(\x05\x62\x06proto3')
)


_TEXTPLUGINDATA = _descriptor.Descriptor(
  name='TextPluginData',
  full_name='tensorboardX.TextPluginData',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='version', full_name='tensorboardX.TextPluginData.version', index=0,
      number=1, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=54,
  serialized_end=87,
)

DESCRIPTOR.message_types_by_name['TextPluginData'] = _TEXTPLUGINDATA
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

TextPluginData = _reflection.GeneratedProtocolMessageType('TextPluginData', (_message.Message,), dict(
  DESCRIPTOR = _TEXTPLUGINDATA,
  __module__ = 'tensorboardX.proto.plugin_text_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.TextPluginData)
  ))
_sym_db.RegisterMessage(TextPluginData)


# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/resource_handle.proto
================================================
syntax = "proto3";

package tensorboardX;
option cc_enable_arenas = true;
option java_outer_classname = "ResourceHandle";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

// Protocol buffer representing a handle to a tensorflow resource. Handles are
// not valid across executions, but can be serialized back and forth from within
// a single run.
message ResourceHandleProto {
  // Unique name for the device containing the resource.
  string device = 1;

  // Container in which this resource is placed.
  string container = 2;

  // Unique name of this resource.
  string name = 3;

  // Hash code for the type of the resource. Is only valid in the same device
  // and in the same execution.
  uint64 hash_code = 4;

  // For debug-only, the name of the type pointed to by this handle, if
  // available.
  string maybe_type_name = 5;
};


================================================
FILE: tensorboardX/tensorboardX/proto/resource_handle_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/resource_handle.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/resource_handle.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\030org.tensorflow.frameworkB\016ResourceHandleP\001\370\001\001'),
  serialized_pb=_b('\n(tensorboardX/proto/resource_handle.proto\x12\x0ctensorboardX\"r\n\x13ResourceHandleProto\x12\x0e\n\x06\x64\x65vice\x18\x01 \x01(\t\x12\x11\n\tcontainer\x18\x02 \x01(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x11\n\thash_code\x18\x04 \x01(\x04\x12\x17\n\x0fmaybe_type_name\x18\x05 \x01(\tB/\n\x18org.tensorflow.frameworkB\x0eResourceHandleP\x01\xf8\x01\x01\x62\x06proto3')
)


_RESOURCEHANDLEPROTO = _descriptor.Descriptor(
  name='ResourceHandleProto',
  full_name='tensorboardX.ResourceHandleProto',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='device', full_name='tensorboardX.ResourceHandleProto.device', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='container', full_name='tensorboardX.ResourceHandleProto.container', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.ResourceHandleProto.name', index=2,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='hash_code', full_name='tensorboardX.ResourceHandleProto.hash_code', index=3,
      number=4, type=4, cpp_type=4, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='maybe_type_name', full_name='tensorboardX.ResourceHandleProto.maybe_type_name', index=4,
      number=5, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=58,
  serialized_end=172,
)

DESCRIPTOR.message_types_by_name['ResourceHandleProto'] = _RESOURCEHANDLEPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

ResourceHandleProto = _reflection.GeneratedProtocolMessageType('ResourceHandleProto', (_message.Message,), dict(
  DESCRIPTOR = _RESOURCEHANDLEPROTO,
  __module__ = 'tensorboardX.proto.resource_handle_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.ResourceHandleProto)
  ))
_sym_db.RegisterMessage(ResourceHandleProto)


DESCRIPTOR._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/step_stats.proto
================================================
syntax = "proto3";

package tensorboardX;
option cc_enable_arenas = true;
option java_outer_classname = "StepStatsProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework";
//import "tensorflow/core/framework/allocation_description.proto";
//import "tensorflow/core/framework/tensor_description.proto";

// An allocation/de-allocation operation performed by the allocator.
message AllocationRecord {
  // The timestamp of the operation.
  int64 alloc_micros = 1;
  // Number of bytes allocated, or de-allocated if negative.
  int64 alloc_bytes = 2;
}

message AllocatorMemoryUsed {
  string allocator_name = 1;
  // These are per-node allocator memory stats.
  int64 total_bytes = 2;
  int64 peak_bytes = 3;
  // The bytes that are not deallocated.
  int64 live_bytes = 4;
  // The allocation and deallocation timeline.
  repeated AllocationRecord allocation_records = 6;

  // These are snapshots of the overall allocator memory stats.
  // The number of live bytes currently allocated by the allocator.
  int64 allocator_bytes_in_use = 5;
}

// Output sizes recorded for a single execution of a graph node.
message NodeOutput {
  int32 slot = 1;
  // TensorDescription tensor_description = 3;
};

// For memory tracking.
message MemoryStats {
  int64 temp_memory_size = 1;
  int64 persistent_memory_size = 3;
  repeated int64 persistent_tensor_alloc_ids = 5;

  int64 device_temp_memory_size = 2 [deprecated = true];
  int64 device_persistent_memory_size = 4 [deprecated = true];
  repeated int64 device_persistent_tensor_alloc_ids = 6 [deprecated = true];
}

// Time/size stats recorded for a single execution of a graph node.
message NodeExecStats {
  // TODO(tucker): Use some more compact form of node identity than
  // the full string name.  Either all processes should agree on a
  // global id (cost_id?) for each node, or we should use a hash of
  // the name.
  string node_name = 1;
  int64 all_start_micros = 2;
  int64 op_start_rel_micros = 3;
  int64 op_end_rel_micros = 4;
  int64 all_end_rel_micros = 5;
  repeated AllocatorMemoryUsed memory = 6;
  repeated NodeOutput output = 7;
  string timeline_label = 8;
  int64 scheduled_micros = 9;
  uint32 thread_id = 10;
  // repeated AllocationDescription referenced_tensor = 11;
  MemoryStats memory_stats = 12;
};

message DeviceStepStats {
  string device = 1;
  repeated NodeExecStats node_stats = 2;
}

message StepStats {
  repeated DeviceStepStats dev_stats = 1;
};


// lanpa, copied from config.proto
// Metadata output (i.e., non-Tensor) for a single Run() call.
message RunMetadata {
  // Statistics traced for this step. Populated if tracing is turned on via the
  // "RunOptions" proto.
  // EXPERIMENTAL: The format and set of events may change in future versions.
  StepStats step_stats = 1;

  // The cost graph for the computation defined by the run call.
  // CostGraphDef cost_graph = 2;

  // Graphs of the partitions executed by executors.
  // repeated GraphDef partition_graphs = 3;
}


================================================
FILE: tensorboardX/tensorboardX/proto/step_stats_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/step_stats.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/step_stats.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\030org.tensorflow.frameworkB\017StepStatsProtosP\001Z=github.com/tensorflow/tensorflow/tensorflow/go/core/framework\370\001\001'),
  serialized_pb=_b('\n#tensorboardX/proto/step_stats.proto\x12\x0ctensorboardX\"=\n\x10\x41llocationRecord\x12\x14\n\x0c\x61lloc_micros\x18\x01 \x01(\x03\x12\x13\n\x0b\x61lloc_bytes\x18\x02 \x01(\x03\"\xc6\x01\n\x13\x41llocatorMemoryUsed\x12\x16\n\x0e\x61llocator_name\x18\x01 \x01(\t\x12\x13\n\x0btotal_bytes\x18\x02 \x01(\x03\x12\x12\n\npeak_bytes\x18\x03 \x01(\x03\x12\x12\n\nlive_bytes\x18\x04 \x01(\x03\x12:\n\x12\x61llocation_records\x18\x06 \x03(\x0b\x32\x1e.tensorboardX.AllocationRecord\x12\x1e\n\x16\x61llocator_bytes_in_use\x18\x05 \x01(\x03\"\x1a\n\nNodeOutput\x12\x0c\n\x04slot\x18\x01 \x01(\x05\"\xec\x01\n\x0bMemoryStats\x12\x18\n\x10temp_memory_size\x18\x01 \x01(\x03\x12\x1e\n\x16persistent_memory_size\x18\x03 \x01(\x03\x12#\n\x1bpersistent_tensor_alloc_ids\x18\x05 \x03(\x03\x12#\n\x17\x64\x65vice_temp_memory_size\x18\x02 \x01(\x03\x42\x02\x18\x01\x12)\n\x1d\x64\x65vice_persistent_memory_size\x18\x04 \x01(\x03\x42\x02\x18\x01\x12.\n\"device_persistent_tensor_alloc_ids\x18\x06 \x03(\x03\x42\x02\x18\x01\"\xe3\x02\n\rNodeExecStats\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x18\n\x10\x61ll_start_micros\x18\x02 \x01(\x03\x12\x1b\n\x13op_start_rel_micros\x18\x03 \x01(\x03\x12\x19\n\x11op_end_rel_micros\x18\x04 \x01(\x03\x12\x1a\n\x12\x61ll_end_rel_micros\x18\x05 \x01(\x03\x12\x31\n\x06memory\x18\x06 \x03(\x0b\x32!.tensorboardX.AllocatorMemoryUsed\x12(\n\x06output\x18\x07 \x03(\x0b\x32\x18.tensorboardX.NodeOutput\x12\x16\n\x0etimeline_label\x18\x08 \x01(\t\x12\x18\n\x10scheduled_micros\x18\t \x01(\x03\x12\x11\n\tthread_id\x18\n \x01(\r\x12/\n\x0cmemory_stats\x18\x0c \x01(\x0b\x32\x19.tensorboardX.MemoryStats\"R\n\x0f\x44\x65viceStepStats\x12\x0e\n\x06\x64\x65vice\x18\x01 \x01(\t\x12/\n\nnode_stats\x18\x02 \x03(\x0b\x32\x1b.tensorboardX.NodeExecStats\"=\n\tStepStats\x12\x30\n\tdev_stats\x18\x01 \x03(\x0b\x32\x1d.tensorboardX.DeviceStepStats\":\n\x0bRunMetadata\x12+\n\nstep_stats\x18\x01 \x01(\x0b\x32\x17.tensorboardX.StepStatsBo\n\x18org.tensorflow.frameworkB\x0fStepStatsProtosP\x01Z=github.com/tensorflow/tensorflow/tensorflow/go/core/framework\xf8\x01\x01\x62\x06proto3')
)


_ALLOCATIONRECORD = _descriptor.Descriptor(
  name='AllocationRecord',
  full_name='tensorboardX.AllocationRecord',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='alloc_micros', full_name='tensorboardX.AllocationRecord.alloc_micros', index=0,
      number=1, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='alloc_bytes', full_name='tensorboardX.AllocationRecord.alloc_bytes', index=1,
      number=2, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=53,
  serialized_end=114,
)


_ALLOCATORMEMORYUSED = _descriptor.Descriptor(
  name='AllocatorMemoryUsed',
  full_name='tensorboardX.AllocatorMemoryUsed',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='allocator_name', full_name='tensorboardX.AllocatorMemoryUsed.allocator_name', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='total_bytes', full_name='tensorboardX.AllocatorMemoryUsed.total_bytes', index=1,
      number=2, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='peak_bytes', full_name='tensorboardX.AllocatorMemoryUsed.peak_bytes', index=2,
      number=3, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='live_bytes', full_name='tensorboardX.AllocatorMemoryUsed.live_bytes', index=3,
      number=4, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='allocation_records', full_name='tensorboardX.AllocatorMemoryUsed.allocation_records', index=4,
      number=6, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='allocator_bytes_in_use', full_name='tensorboardX.AllocatorMemoryUsed.allocator_bytes_in_use', index=5,
      number=5, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=117,
  serialized_end=315,
)


_NODEOUTPUT = _descriptor.Descriptor(
  name='NodeOutput',
  full_name='tensorboardX.NodeOutput',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='slot', full_name='tensorboardX.NodeOutput.slot', index=0,
      number=1, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=317,
  serialized_end=343,
)


_MEMORYSTATS = _descriptor.Descriptor(
  name='MemoryStats',
  full_name='tensorboardX.MemoryStats',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='temp_memory_size', full_name='tensorboardX.MemoryStats.temp_memory_size', index=0,
      number=1, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='persistent_memory_size', full_name='tensorboardX.MemoryStats.persistent_memory_size', index=1,
      number=3, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='persistent_tensor_alloc_ids', full_name='tensorboardX.MemoryStats.persistent_tensor_alloc_ids', index=2,
      number=5, type=3, cpp_type=2, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='device_temp_memory_size', full_name='tensorboardX.MemoryStats.device_temp_memory_size', index=3,
      number=2, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\030\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='device_persistent_memory_size', full_name='tensorboardX.MemoryStats.device_persistent_memory_size', index=4,
      number=4, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\030\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='device_persistent_tensor_alloc_ids', full_name='tensorboardX.MemoryStats.device_persistent_tensor_alloc_ids', index=5,
      number=6, type=3, cpp_type=2, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\030\001'), file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=346,
  serialized_end=582,
)


_NODEEXECSTATS = _descriptor.Descriptor(
  name='NodeExecStats',
  full_name='tensorboardX.NodeExecStats',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='node_name', full_name='tensorboardX.NodeExecStats.node_name', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='all_start_micros', full_name='tensorboardX.NodeExecStats.all_start_micros', index=1,
      number=2, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='op_start_rel_micros', full_name='tensorboardX.NodeExecStats.op_start_rel_micros', index=2,
      number=3, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='op_end_rel_micros', full_name='tensorboardX.NodeExecStats.op_end_rel_micros', index=3,
      number=4, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='all_end_rel_micros', full_name='tensorboardX.NodeExecStats.all_end_rel_micros', index=4,
      number=5, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='memory', full_name='tensorboardX.NodeExecStats.memory', index=5,
      number=6, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='output', full_name='tensorboardX.NodeExecStats.output', index=6,
      number=7, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='timeline_label', full_name='tensorboardX.NodeExecStats.timeline_label', index=7,
      number=8, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='scheduled_micros', full_name='tensorboardX.NodeExecStats.scheduled_micros', index=8,
      number=9, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='thread_id', full_name='tensorboardX.NodeExecStats.thread_id', index=9,
      number=10, type=13, cpp_type=3, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='memory_stats', full_name='tensorboardX.NodeExecStats.memory_stats', index=10,
      number=12, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=585,
  serialized_end=940,
)


_DEVICESTEPSTATS = _descriptor.Descriptor(
  name='DeviceStepStats',
  full_name='tensorboardX.DeviceStepStats',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='device', full_name='tensorboardX.DeviceStepStats.device', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='node_stats', full_name='tensorboardX.DeviceStepStats.node_stats', index=1,
      number=2, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=942,
  serialized_end=1024,
)


_STEPSTATS = _descriptor.Descriptor(
  name='StepStats',
  full_name='tensorboardX.StepStats',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='dev_stats', full_name='tensorboardX.StepStats.dev_stats', index=0,
      number=1, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1026,
  serialized_end=1087,
)


_RUNMETADATA = _descriptor.Descriptor(
  name='RunMetadata',
  full_name='tensorboardX.RunMetadata',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='step_stats', full_name='tensorboardX.RunMetadata.step_stats', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1089,
  serialized_end=1147,
)

_ALLOCATORMEMORYUSED.fields_by_name['allocation_records'].message_type = _ALLOCATIONRECORD
_NODEEXECSTATS.fields_by_name['memory'].message_type = _ALLOCATORMEMORYUSED
_NODEEXECSTATS.fields_by_name['output'].message_type = _NODEOUTPUT
_NODEEXECSTATS.fields_by_name['memory_stats'].message_type = _MEMORYSTATS
_DEVICESTEPSTATS.fields_by_name['node_stats'].message_type = _NODEEXECSTATS
_STEPSTATS.fields_by_name['dev_stats'].message_type = _DEVICESTEPSTATS
_RUNMETADATA.fields_by_name['step_stats'].message_type = _STEPSTATS
DESCRIPTOR.message_types_by_name['AllocationRecord'] = _ALLOCATIONRECORD
DESCRIPTOR.message_types_by_name['AllocatorMemoryUsed'] = _ALLOCATORMEMORYUSED
DESCRIPTOR.message_types_by_name['NodeOutput'] = _NODEOUTPUT
DESCRIPTOR.message_types_by_name['MemoryStats'] = _MEMORYSTATS
DESCRIPTOR.message_types_by_name['NodeExecStats'] = _NODEEXECSTATS
DESCRIPTOR.message_types_by_name['DeviceStepStats'] = _DEVICESTEPSTATS
DESCRIPTOR.message_types_by_name['StepStats'] = _STEPSTATS
DESCRIPTOR.message_types_by_name['RunMetadata'] = _RUNMETADATA
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

AllocationRecord = _reflection.GeneratedProtocolMessageType('AllocationRecord', (_message.Message,), dict(
  DESCRIPTOR = _ALLOCATIONRECORD,
  __module__ = 'tensorboardX.proto.step_stats_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.AllocationRecord)
  ))
_sym_db.RegisterMessage(AllocationRecord)

AllocatorMemoryUsed = _reflection.GeneratedProtocolMessageType('AllocatorMemoryUsed', (_message.Message,), dict(
  DESCRIPTOR = _ALLOCATORMEMORYUSED,
  __module__ = 'tensorboardX.proto.step_stats_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.AllocatorMemoryUsed)
  ))
_sym_db.RegisterMessage(AllocatorMemoryUsed)

NodeOutput = _reflection.GeneratedProtocolMessageType('NodeOutput', (_message.Message,), dict(
  DESCRIPTOR = _NODEOUTPUT,
  __module__ = 'tensorboardX.proto.step_stats_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.NodeOutput)
  ))
_sym_db.RegisterMessage(NodeOutput)

MemoryStats = _reflection.GeneratedProtocolMessageType('MemoryStats', (_message.Message,), dict(
  DESCRIPTOR = _MEMORYSTATS,
  __module__ = 'tensorboardX.proto.step_stats_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.MemoryStats)
  ))
_sym_db.RegisterMessage(MemoryStats)

NodeExecStats = _reflection.GeneratedProtocolMessageType('NodeExecStats', (_message.Message,), dict(
  DESCRIPTOR = _NODEEXECSTATS,
  __module__ = 'tensorboardX.proto.step_stats_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.NodeExecStats)
  ))
_sym_db.RegisterMessage(NodeExecStats)

DeviceStepStats = _reflection.GeneratedProtocolMessageType('DeviceStepStats', (_message.Message,), dict(
  DESCRIPTOR = _DEVICESTEPSTATS,
  __module__ = 'tensorboardX.proto.step_stats_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.DeviceStepStats)
  ))
_sym_db.RegisterMessage(DeviceStepStats)

StepStats = _reflection.GeneratedProtocolMessageType('StepStats', (_message.Message,), dict(
  DESCRIPTOR = _STEPSTATS,
  __module__ = 'tensorboardX.proto.step_stats_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.StepStats)
  ))
_sym_db.RegisterMessage(StepStats)

RunMetadata = _reflection.GeneratedProtocolMessageType('RunMetadata', (_message.Message,), dict(
  DESCRIPTOR = _RUNMETADATA,
  __module__ = 'tensorboardX.proto.step_stats_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.RunMetadata)
  ))
_sym_db.RegisterMessage(RunMetadata)


DESCRIPTOR._options = None
_MEMORYSTATS.fields_by_name['device_temp_memory_size']._options = None
_MEMORYSTATS.fields_by_name['device_persistent_memory_size']._options = None
_MEMORYSTATS.fields_by_name['device_persistent_tensor_alloc_ids']._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/summary.proto
================================================
syntax = "proto3";

package tensorboardX;
option cc_enable_arenas = true;
option java_outer_classname = "SummaryProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "tensorboardX/proto/tensor.proto";

// Metadata associated with a series of Summary data
message SummaryDescription {
  // Hint on how plugins should process the data in this series.
  // Supported values include "scalar", "histogram", "image", "audio"
  string type_hint = 1;
}

// Serialization format for histogram module in
// core/lib/histogram/histogram.h
message HistogramProto {
  double min = 1;
  double max = 2;
  double num = 3;
  double sum = 4;
  double sum_squares = 5;

  // Parallel arrays encoding the bucket boundaries and the bucket values.
  // bucket(i) is the count for the bucket i.  The range for
  // a bucket is:
  //   i == 0:  -DBL_MAX .. bucket_limit(0)
  //   i != 0:  bucket_limit(i-1) .. bucket_limit(i)
  repeated double bucket_limit = 6 [packed = true];
  repeated double bucket = 7 [packed = true];
};

// A SummaryMetadata encapsulates information on which plugins are able to make
// use of a certain summary value.
message SummaryMetadata {
  message PluginData {
    // The name of the plugin this data pertains to.
    string plugin_name = 1;

    // The content to store for the plugin. The best practice is for this to be
    // a binary serialized protocol buffer.
    bytes content = 2;
  }

  // Data that associates a summary with a certain plugin.
  PluginData plugin_data = 1;

  // Display name for viewing in TensorBoard.
  string display_name = 2;

  // Longform readable description of the summary sequence. Markdown supported.
  string summary_description = 3;
};

// A Summary is a set of named values to be displayed by the
// visualizer.
//
// Summaries are produced regularly during training, as controlled by
// the "summary_interval_secs" attribute of the training operation.
// Summaries are also produced at the end of an evaluation.
message Summary {
  message Image {
    // Dimensions of the image.
    int32 height = 1;
    int32 width = 2;
    // Valid colorspace values are
    //   1 - grayscale
    //   2 - grayscale + alpha
    //   3 - RGB
    //   4 - RGBA
    //   5 - DIGITAL_YUV
    //   6 - BGRA
    int32 colorspace = 3;
    // Image data in encoded format.  All image formats supported by
    // image_codec::CoderUtil can be stored here.
    bytes encoded_image_string = 4;
  }

  message Audio {
    // Sample rate of the audio in Hz.
    float sample_rate = 1;
    // Number of channels of audio.
    int64 num_channels = 2;
    // Length of the audio in frames (samples per channel).
    int64 length_frames = 3;
    // Encoded audio data and its associated RFC 2045 content type (e.g.
    // "audio/wav").
    bytes encoded_audio_string = 4;
    string content_type = 5;
  }

  message Value {
    // This field is deprecated and will not be set.
    string node_name = 7;

    // Tag name for the data. Used by TensorBoard plugins to organize data. Tags
    // are often organized by scope (which contains slashes to convey
    // hierarchy). For example: foo/bar/0
    string tag = 1;

    // Contains metadata on the summary value such as which plugins may use it.
    // Take note that many summary values may lack a metadata field. This is
    // because the FileWriter only keeps a metadata object on the first summary
    // value with a certain tag for each tag. TensorBoard then remembers which
    // tags are associated with which plugins. This saves space.
    SummaryMetadata metadata = 9;

    // Value associated with the tag.
    oneof value {
      float simple_value = 2;
      bytes obsolete_old_style_histogram = 3;
      Image image = 4;
      HistogramProto histo = 5;
      Audio audio = 6;
      TensorProto tensor = 8;
    }
  }

  // Set of values for the summary.
  repeated Value value = 1;
}


================================================
FILE: tensorboardX/tensorboardX/proto/summary_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/summary.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


from tensorboardX.proto import tensor_pb2 as tensorboardX_dot_proto_dot_tensor__pb2


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/summary.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\030org.tensorflow.frameworkB\rSummaryProtosP\001\370\001\001'),
  serialized_pb=_b('\n tensorboardX/proto/summary.proto\x12\x0ctensorboardX\x1a\x1ftensorboardX/proto/tensor.proto\"\'\n\x12SummaryDescription\x12\x11\n\ttype_hint\x18\x01 \x01(\t\"\x87\x01\n\x0eHistogramProto\x12\x0b\n\x03min\x18\x01 \x01(\x01\x12\x0b\n\x03max\x18\x02 \x01(\x01\x12\x0b\n\x03num\x18\x03 \x01(\x01\x12\x0b\n\x03sum\x18\x04 \x01(\x01\x12\x13\n\x0bsum_squares\x18\x05 \x01(\x01\x12\x18\n\x0c\x62ucket_limit\x18\x06 \x03(\x01\x42\x02\x10\x01\x12\x12\n\x06\x62ucket\x18\x07 \x03(\x01\x42\x02\x10\x01\"\xb7\x01\n\x0fSummaryMetadata\x12=\n\x0bplugin_data\x18\x01 \x01(\x0b\x32(.tensorboardX.SummaryMetadata.PluginData\x12\x14\n\x0c\x64isplay_name\x18\x02 \x01(\t\x12\x1b\n\x13summary_description\x18\x03 \x01(\t\x1a\x32\n\nPluginData\x12\x13\n\x0bplugin_name\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\"\xea\x04\n\x07Summary\x12*\n\x05value\x18\x01 \x03(\x0b\x32\x1b.tensorboardX.Summary.Value\x1aX\n\x05Image\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x12\n\ncolorspace\x18\x03 \x01(\x05\x12\x1c\n\x14\x65ncoded_image_string\x18\x04 \x01(\x0c\x1a}\n\x05\x41udio\x12\x13\n\x0bsample_rate\x18\x01 \x01(\x02\x12\x14\n\x0cnum_channels\x18\x02 \x01(\x03\x12\x15\n\rlength_frames\x18\x03 \x01(\x03\x12\x1c\n\x14\x65ncoded_audio_string\x18\x04 \x01(\x0c\x12\x14\n\x0c\x63ontent_type\x18\x05 \x01(\t\x1a\xd9\x02\n\x05Value\x12\x11\n\tnode_name\x18\x07 \x01(\t\x12\x0b\n\x03tag\x18\x01 \x01(\t\x12/\n\x08metadata\x18\t \x01(\x0b\x32\x1d.tensorboardX.SummaryMetadata\x12\x16\n\x0csimple_value\x18\x02 \x01(\x02H\x00\x12&\n\x1cobsolete_old_style_histogram\x18\x03 \x01(\x0cH\x00\x12,\n\x05image\x18\x04 \x01(\x0b\x32\x1b.tensorboardX.Summary.ImageH\x00\x12-\n\x05histo\x18\x05 \x01(\x0b\x32\x1c.tensorboardX.HistogramProtoH\x00\x12,\n\x05\x61udio\x18\x06 \x01(\x0b\x32\x1b.tensorboardX.Summary.AudioH\x00\x12+\n\x06tensor\x18\x08 \x01(\x0b\x32\x19.tensorboardX.TensorProtoH\x00\x42\x07\n\x05valueB.\n\x18org.tensorflow.frameworkB\rSummaryProtosP\x01\xf8\x01\x01\x62\x06proto3')
  ,
  dependencies=[tensorboardX_dot_proto_dot_tensor__pb2.DESCRIPTOR,])


_SUMMARYDESCRIPTION = _descriptor.Descriptor(
  name='SummaryDescription',
  full_name='tensorboardX.SummaryDescription',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='type_hint', full_name='tensorboardX.SummaryDescription.type_hint', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=83,
  serialized_end=122,
)


_HISTOGRAMPROTO = _descriptor.Descriptor(
  name='HistogramProto',
  full_name='tensorboardX.HistogramProto',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='min', full_name='tensorboardX.HistogramProto.min', index=0,
      number=1, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='max', full_name='tensorboardX.HistogramProto.max', index=1,
      number=2, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='num', full_name='tensorboardX.HistogramProto.num', index=2,
      number=3, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='sum', full_name='tensorboardX.HistogramProto.sum', index=3,
      number=4, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='sum_squares', full_name='tensorboardX.HistogramProto.sum_squares', index=4,
      number=5, type=1, cpp_type=5, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='bucket_limit', full_name='tensorboardX.HistogramProto.bucket_limit', index=5,
      number=6, type=1, cpp_type=5, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='bucket', full_name='tensorboardX.HistogramProto.bucket', index=6,
      number=7, type=1, cpp_type=5, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=125,
  serialized_end=260,
)


_SUMMARYMETADATA_PLUGINDATA = _descriptor.Descriptor(
  name='PluginData',
  full_name='tensorboardX.SummaryMetadata.PluginData',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='plugin_name', full_name='tensorboardX.SummaryMetadata.PluginData.plugin_name', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='content', full_name='tensorboardX.SummaryMetadata.PluginData.content', index=1,
      number=2, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=_b(""),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=396,
  serialized_end=446,
)

_SUMMARYMETADATA = _descriptor.Descriptor(
  name='SummaryMetadata',
  full_name='tensorboardX.SummaryMetadata',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='plugin_data', full_name='tensorboardX.SummaryMetadata.plugin_data', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='display_name', full_name='tensorboardX.SummaryMetadata.display_name', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='summary_description', full_name='tensorboardX.SummaryMetadata.summary_description', index=2,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_SUMMARYMETADATA_PLUGINDATA, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=263,
  serialized_end=446,
)


_SUMMARY_IMAGE = _descriptor.Descriptor(
  name='Image',
  full_name='tensorboardX.Summary.Image',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='height', full_name='tensorboardX.Summary.Image.height', index=0,
      number=1, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='width', full_name='tensorboardX.Summary.Image.width', index=1,
      number=2, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='colorspace', full_name='tensorboardX.Summary.Image.colorspace', index=2,
      number=3, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='encoded_image_string', full_name='tensorboardX.Summary.Image.encoded_image_string', index=3,
      number=4, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=_b(""),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=504,
  serialized_end=592,
)

_SUMMARY_AUDIO = _descriptor.Descriptor(
  name='Audio',
  full_name='tensorboardX.Summary.Audio',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='sample_rate', full_name='tensorboardX.Summary.Audio.sample_rate', index=0,
      number=1, type=2, cpp_type=6, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='num_channels', full_name='tensorboardX.Summary.Audio.num_channels', index=1,
      number=2, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='length_frames', full_name='tensorboardX.Summary.Audio.length_frames', index=2,
      number=3, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='encoded_audio_string', full_name='tensorboardX.Summary.Audio.encoded_audio_string', index=3,
      number=4, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=_b(""),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='content_type', full_name='tensorboardX.Summary.Audio.content_type', index=4,
      number=5, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=594,
  serialized_end=719,
)

_SUMMARY_VALUE = _descriptor.Descriptor(
  name='Value',
  full_name='tensorboardX.Summary.Value',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='node_name', full_name='tensorboardX.Summary.Value.node_name', index=0,
      number=7, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='tag', full_name='tensorboardX.Summary.Value.tag', index=1,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='metadata', full_name='tensorboardX.Summary.Value.metadata', index=2,
      number=9, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='simple_value', full_name='tensorboardX.Summary.Value.simple_value', index=3,
      number=2, type=2, cpp_type=6, label=1,
      has_default_value=False, default_value=float(0),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='obsolete_old_style_histogram', full_name='tensorboardX.Summary.Value.obsolete_old_style_histogram', index=4,
      number=3, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=_b(""),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='image', full_name='tensorboardX.Summary.Value.image', index=5,
      number=4, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='histo', full_name='tensorboardX.Summary.Value.histo', index=6,
      number=5, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='audio', full_name='tensorboardX.Summary.Value.audio', index=7,
      number=6, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='tensor', full_name='tensorboardX.Summary.Value.tensor', index=8,
      number=8, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
    _descriptor.OneofDescriptor(
      name='value', full_name='tensorboardX.Summary.Value.value',
      index=0, containing_type=None, fields=[]),
  ],
  serialized_start=722,
  serialized_end=1067,
)

_SUMMARY = _descriptor.Descriptor(
  name='Summary',
  full_name='tensorboardX.Summary',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='value', full_name='tensorboardX.Summary.value', index=0,
      number=1, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_SUMMARY_IMAGE, _SUMMARY_AUDIO, _SUMMARY_VALUE, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=449,
  serialized_end=1067,
)

_SUMMARYMETADATA_PLUGINDATA.containing_type = _SUMMARYMETADATA
_SUMMARYMETADATA.fields_by_name['plugin_data'].message_type = _SUMMARYMETADATA_PLUGINDATA
_SUMMARY_IMAGE.containing_type = _SUMMARY
_SUMMARY_AUDIO.containing_type = _SUMMARY
_SUMMARY_VALUE.fields_by_name['metadata'].message_type = _SUMMARYMETADATA
_SUMMARY_VALUE.fields_by_name['image'].message_type = _SUMMARY_IMAGE
_SUMMARY_VALUE.fields_by_name['histo'].message_type = _HISTOGRAMPROTO
_SUMMARY_VALUE.fields_by_name['audio'].message_type = _SUMMARY_AUDIO
_SUMMARY_VALUE.fields_by_name['tensor'].message_type = tensorboardX_dot_proto_dot_tensor__pb2._TENSORPROTO
_SUMMARY_VALUE.containing_type = _SUMMARY
_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
  _SUMMARY_VALUE.fields_by_name['simple_value'])
_SUMMARY_VALUE.fields_by_name['simple_value'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
  _SUMMARY_VALUE.fields_by_name['obsolete_old_style_histogram'])
_SUMMARY_VALUE.fields_by_name['obsolete_old_style_histogram'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
  _SUMMARY_VALUE.fields_by_name['image'])
_SUMMARY_VALUE.fields_by_name['image'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
  _SUMMARY_VALUE.fields_by_name['histo'])
_SUMMARY_VALUE.fields_by_name['histo'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
  _SUMMARY_VALUE.fields_by_name['audio'])
_SUMMARY_VALUE.fields_by_name['audio'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
_SUMMARY_VALUE.oneofs_by_name['value'].fields.append(
  _SUMMARY_VALUE.fields_by_name['tensor'])
_SUMMARY_VALUE.fields_by_name['tensor'].containing_oneof = _SUMMARY_VALUE.oneofs_by_name['value']
_SUMMARY.fields_by_name['value'].message_type = _SUMMARY_VALUE
DESCRIPTOR.message_types_by_name['SummaryDescription'] = _SUMMARYDESCRIPTION
DESCRIPTOR.message_types_by_name['HistogramProto'] = _HISTOGRAMPROTO
DESCRIPTOR.message_types_by_name['SummaryMetadata'] = _SUMMARYMETADATA
DESCRIPTOR.message_types_by_name['Summary'] = _SUMMARY
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

SummaryDescription = _reflection.GeneratedProtocolMessageType('SummaryDescription', (_message.Message,), dict(
  DESCRIPTOR = _SUMMARYDESCRIPTION,
  __module__ = 'tensorboardX.proto.summary_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.SummaryDescription)
  ))
_sym_db.RegisterMessage(SummaryDescription)

HistogramProto = _reflection.GeneratedProtocolMessageType('HistogramProto', (_message.Message,), dict(
  DESCRIPTOR = _HISTOGRAMPROTO,
  __module__ = 'tensorboardX.proto.summary_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.HistogramProto)
  ))
_sym_db.RegisterMessage(HistogramProto)

SummaryMetadata = _reflection.GeneratedProtocolMessageType('SummaryMetadata', (_message.Message,), dict(

  PluginData = _reflection.GeneratedProtocolMessageType('PluginData', (_message.Message,), dict(
    DESCRIPTOR = _SUMMARYMETADATA_PLUGINDATA,
    __module__ = 'tensorboardX.proto.summary_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.SummaryMetadata.PluginData)
    ))
  ,
  DESCRIPTOR = _SUMMARYMETADATA,
  __module__ = 'tensorboardX.proto.summary_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.SummaryMetadata)
  ))
_sym_db.RegisterMessage(SummaryMetadata)
_sym_db.RegisterMessage(SummaryMetadata.PluginData)

Summary = _reflection.GeneratedProtocolMessageType('Summary', (_message.Message,), dict(

  Image = _reflection.GeneratedProtocolMessageType('Image', (_message.Message,), dict(
    DESCRIPTOR = _SUMMARY_IMAGE,
    __module__ = 'tensorboardX.proto.summary_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.Summary.Image)
    ))
  ,

  Audio = _reflection.GeneratedProtocolMessageType('Audio', (_message.Message,), dict(
    DESCRIPTOR = _SUMMARY_AUDIO,
    __module__ = 'tensorboardX.proto.summary_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.Summary.Audio)
    ))
  ,

  Value = _reflection.GeneratedProtocolMessageType('Value', (_message.Message,), dict(
    DESCRIPTOR = _SUMMARY_VALUE,
    __module__ = 'tensorboardX.proto.summary_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.Summary.Value)
    ))
  ,
  DESCRIPTOR = _SUMMARY,
  __module__ = 'tensorboardX.proto.summary_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.Summary)
  ))
_sym_db.RegisterMessage(Summary)
_sym_db.RegisterMessage(Summary.Image)
_sym_db.RegisterMessage(Summary.Audio)
_sym_db.RegisterMessage(Summary.Value)


DESCRIPTOR._options = None
_HISTOGRAMPROTO.fields_by_name['bucket_limit']._options = None
_HISTOGRAMPROTO.fields_by_name['bucket']._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/tensor.proto
================================================
syntax = "proto3";

package tensorboardX;
option cc_enable_arenas = true;
option java_outer_classname = "TensorProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "tensorboardX/proto/resource_handle.proto";
import "tensorboardX/proto/tensor_shape.proto";
import "tensorboardX/proto/types.proto";

// Protocol buffer representing a tensor.
message TensorProto {
  DataType dtype = 1;

  // Shape of the tensor.  TODO(touts): sort out the 0-rank issues.
  TensorShapeProto tensor_shape = 2;

  // Only one of the representations below is set, one of "tensor_contents" and
  // the "xxx_val" attributes.  We are not using oneof because as oneofs cannot
  // contain repeated fields it would require another extra set of messages.

  // Version number.
  //
  // In version 0, if the "repeated xxx" representations contain only one
  // element, that element is repeated to fill the shape.  This makes it easy
  // to represent a constant Tensor with a single value.
  int32 version_number = 3;

  // Serialized raw tensor content from either Tensor::AsProtoTensorContent or
  // memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation
  // can be used for all tensor types. The purpose of this representation is to
  // reduce serialization overhead during RPC call by avoiding serialization of
  // many repeated small items.
  bytes tensor_content = 4;

  // Type specific representations that make it easy to create tensor protos in
  // all languages.  Only the representation corresponding to "dtype" can
  // be set.  The values hold the flattened representation of the tensor in
  // row major order.

  // DT_HALF. Note that since protobuf has no int16 type, we'll have some
  // pointless zero padding for each value here.
  repeated int32 half_val = 13 [packed = true];

  // DT_FLOAT.
  repeated float float_val = 5 [packed = true];

  // DT_DOUBLE.
  repeated double double_val = 6 [packed = true];

  // DT_INT32, DT_INT16, DT_INT8, DT_UINT8.
  repeated int32 int_val = 7 [packed = true];

  // DT_STRING
  repeated bytes string_val = 8;

  // DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real
  // and imaginary parts of i-th single precision complex.
  repeated float scomplex_val = 9 [packed = true];

  // DT_INT64
  repeated int64 int64_val = 10 [packed = true];

  // DT_BOOL
  repeated bool bool_val = 11 [packed = true];

  // DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real
  // and imaginary parts of i-th double precision complex.
  repeated double dcomplex_val = 12 [packed = true];

  // DT_RESOURCE
  repeated ResourceHandleProto resource_handle_val = 14;
};


================================================
FILE: tensorboardX/tensorboardX/proto/tensor_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/tensor.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


from tensorboardX.proto import resource_handle_pb2 as tensorboardX_dot_proto_dot_resource__handle__pb2
from tensorboardX.proto import tensor_shape_pb2 as tensorboardX_dot_proto_dot_tensor__shape__pb2
from tensorboardX.proto import types_pb2 as tensorboardX_dot_proto_dot_types__pb2


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/tensor.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\030org.tensorflow.frameworkB\014TensorProtosP\001\370\001\001'),
  serialized_pb=_b('\n\x1ftensorboardX/proto/tensor.proto\x12\x0ctensorboardX\x1a(tensorboardX/proto/resource_handle.proto\x1a%tensorboardX/proto/tensor_shape.proto\x1a\x1etensorboardX/proto/types.proto\"\xa9\x03\n\x0bTensorProto\x12%\n\x05\x64type\x18\x01 \x01(\x0e\x32\x16.tensorboardX.DataType\x12\x34\n\x0ctensor_shape\x18\x02 \x01(\x0b\x32\x1e.tensorboardX.TensorShapeProto\x12\x16\n\x0eversion_number\x18\x03 \x01(\x05\x12\x16\n\x0etensor_content\x18\x04 \x01(\x0c\x12\x14\n\x08half_val\x18\r \x03(\x05\x42\x02\x10\x01\x12\x15\n\tfloat_val\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x16\n\ndouble_val\x18\x06 \x03(\x01\x42\x02\x10\x01\x12\x13\n\x07int_val\x18\x07 \x03(\x05\x42\x02\x10\x01\x12\x12\n\nstring_val\x18\x08 \x03(\x0c\x12\x18\n\x0cscomplex_val\x18\t \x03(\x02\x42\x02\x10\x01\x12\x15\n\tint64_val\x18\n \x03(\x03\x42\x02\x10\x01\x12\x14\n\x08\x62ool_val\x18\x0b \x03(\x08\x42\x02\x10\x01\x12\x18\n\x0c\x64\x63omplex_val\x18\x0c \x03(\x01\x42\x02\x10\x01\x12>\n\x13resource_handle_val\x18\x0e \x03(\x0b\x32!.tensorboardX.ResourceHandleProtoB-\n\x18org.tensorflow.frameworkB\x0cTensorProtosP\x01\xf8\x01\x01\x62\x06proto3')
  ,
  dependencies=[tensorboardX_dot_proto_dot_resource__handle__pb2.DESCRIPTOR,tensorboardX_dot_proto_dot_tensor__shape__pb2.DESCRIPTOR,tensorboardX_dot_proto_dot_types__pb2.DESCRIPTOR,])


_TENSORPROTO = _descriptor.Descriptor(
  name='TensorProto',
  full_name='tensorboardX.TensorProto',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='dtype', full_name='tensorboardX.TensorProto.dtype', index=0,
      number=1, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='tensor_shape', full_name='tensorboardX.TensorProto.tensor_shape', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='version_number', full_name='tensorboardX.TensorProto.version_number', index=2,
      number=3, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='tensor_content', full_name='tensorboardX.TensorProto.tensor_content', index=3,
      number=4, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=_b(""),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='half_val', full_name='tensorboardX.TensorProto.half_val', index=4,
      number=13, type=5, cpp_type=1, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='float_val', full_name='tensorboardX.TensorProto.float_val', index=5,
      number=5, type=2, cpp_type=6, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='double_val', full_name='tensorboardX.TensorProto.double_val', index=6,
      number=6, type=1, cpp_type=5, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='int_val', full_name='tensorboardX.TensorProto.int_val', index=7,
      number=7, type=5, cpp_type=1, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='string_val', full_name='tensorboardX.TensorProto.string_val', index=8,
      number=8, type=12, cpp_type=9, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='scomplex_val', full_name='tensorboardX.TensorProto.scomplex_val', index=9,
      number=9, type=2, cpp_type=6, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='int64_val', full_name='tensorboardX.TensorProto.int64_val', index=10,
      number=10, type=3, cpp_type=2, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='bool_val', full_name='tensorboardX.TensorProto.bool_val', index=11,
      number=11, type=8, cpp_type=7, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='dcomplex_val', full_name='tensorboardX.TensorProto.dcomplex_val', index=12,
      number=12, type=1, cpp_type=5, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=_b('\020\001'), file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='resource_handle_val', full_name='tensorboardX.TensorProto.resource_handle_val', index=13,
      number=14, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=163,
  serialized_end=588,
)

_TENSORPROTO.fields_by_name['dtype'].enum_type = tensorboardX_dot_proto_dot_types__pb2._DATATYPE
_TENSORPROTO.fields_by_name['tensor_shape'].message_type = tensorboardX_dot_proto_dot_tensor__shape__pb2._TENSORSHAPEPROTO
_TENSORPROTO.fields_by_name['resource_handle_val'].message_type = tensorboardX_dot_proto_dot_resource__handle__pb2._RESOURCEHANDLEPROTO
DESCRIPTOR.message_types_by_name['TensorProto'] = _TENSORPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

TensorProto = _reflection.GeneratedProtocolMessageType('TensorProto', (_message.Message,), dict(
  DESCRIPTOR = _TENSORPROTO,
  __module__ = 'tensorboardX.proto.tensor_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.TensorProto)
  ))
_sym_db.RegisterMessage(TensorProto)


DESCRIPTOR._options = None
_TENSORPROTO.fields_by_name['half_val']._options = None
_TENSORPROTO.fields_by_name['float_val']._options = None
_TENSORPROTO.fields_by_name['double_val']._options = None
_TENSORPROTO.fields_by_name['int_val']._options = None
_TENSORPROTO.fields_by_name['scomplex_val']._options = None
_TENSORPROTO.fields_by_name['int64_val']._options = None
_TENSORPROTO.fields_by_name['bool_val']._options = None
_TENSORPROTO.fields_by_name['dcomplex_val']._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/tensor_shape.proto
================================================
// Protocol buffer representing the shape of tensors.

syntax = "proto3";
option cc_enable_arenas = true;
option java_outer_classname = "TensorShapeProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

package tensorboardX;

// Dimensions of a tensor.
message TensorShapeProto {
  // One dimension of the tensor.
  message Dim {
    // Size of the tensor in that dimension.
    // This value must be >= -1, but values of -1 are reserved for "unknown"
    // shapes (values of -1 mean "unknown" dimension).  Certain wrappers
    // that work with TensorShapeProto may fail at runtime when deserializing
    // a TensorShapeProto containing a dim value of -1.
    int64 size = 1;

    // Optional name of the tensor dimension.
    string name = 2;
  };

  // Dimensions of the tensor, such as {"input", 30}, {"output", 40}
  // for a 30 x 40 2D tensor.  If an entry has size -1, this
  // corresponds to a dimension of unknown size. The names are
  // optional.
  //
  // The order of entries in "dim" matters: It indicates the layout of the
  // values in the tensor in-memory representation.
  //
  // The first entry in "dim" is the outermost dimension used to layout the
  // values, the last entry is the innermost dimension.  This matches the
  // in-memory layout of RowMajor Eigen tensors.
  //
  // If "dim.size()" > 0, "unknown_rank" must be false.
  repeated Dim dim = 2;

  // If true, the number of dimensions in the shape is unknown.
  //
  // If true, "dim.size()" must be 0.
  bool unknown_rank = 3;
};


================================================
FILE: tensorboardX/tensorboardX/proto/tensor_shape_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/tensor_shape.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/tensor_shape.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\030org.tensorflow.frameworkB\021TensorShapeProtosP\001\370\001\001'),
  serialized_pb=_b('\n%tensorboardX/proto/tensor_shape.proto\x12\x0ctensorboardX\"|\n\x10TensorShapeProto\x12/\n\x03\x64im\x18\x02 \x03(\x0b\x32\".tensorboardX.TensorShapeProto.Dim\x12\x14\n\x0cunknown_rank\x18\x03 \x01(\x08\x1a!\n\x03\x44im\x12\x0c\n\x04size\x18\x01 \x01(\x03\x12\x0c\n\x04name\x18\x02 \x01(\tB2\n\x18org.tensorflow.frameworkB\x11TensorShapeProtosP\x01\xf8\x01\x01\x62\x06proto3')
)


_TENSORSHAPEPROTO_DIM = _descriptor.Descriptor(
  name='Dim',
  full_name='tensorboardX.TensorShapeProto.Dim',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='size', full_name='tensorboardX.TensorShapeProto.Dim.size', index=0,
      number=1, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='name', full_name='tensorboardX.TensorShapeProto.Dim.name', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=_b("").decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=146,
  serialized_end=179,
)

_TENSORSHAPEPROTO = _descriptor.Descriptor(
  name='TensorShapeProto',
  full_name='tensorboardX.TensorShapeProto',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='dim', full_name='tensorboardX.TensorShapeProto.dim', index=0,
      number=2, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='unknown_rank', full_name='tensorboardX.TensorShapeProto.unknown_rank', index=1,
      number=3, type=8, cpp_type=7, label=1,
      has_default_value=False, default_value=False,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_TENSORSHAPEPROTO_DIM, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=55,
  serialized_end=179,
)

_TENSORSHAPEPROTO_DIM.containing_type = _TENSORSHAPEPROTO
_TENSORSHAPEPROTO.fields_by_name['dim'].message_type = _TENSORSHAPEPROTO_DIM
DESCRIPTOR.message_types_by_name['TensorShapeProto'] = _TENSORSHAPEPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

TensorShapeProto = _reflection.GeneratedProtocolMessageType('TensorShapeProto', (_message.Message,), dict(

  Dim = _reflection.GeneratedProtocolMessageType('Dim', (_message.Message,), dict(
    DESCRIPTOR = _TENSORSHAPEPROTO_DIM,
    __module__ = 'tensorboardX.proto.tensor_shape_pb2'
    # @@protoc_insertion_point(class_scope:tensorboardX.TensorShapeProto.Dim)
    ))
  ,
  DESCRIPTOR = _TENSORSHAPEPROTO,
  __module__ = 'tensorboardX.proto.tensor_shape_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.TensorShapeProto)
  ))
_sym_db.RegisterMessage(TensorShapeProto)
_sym_db.RegisterMessage(TensorShapeProto.Dim)


DESCRIPTOR._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/types.proto
================================================
syntax = "proto3";

package tensorboardX;
option cc_enable_arenas = true;
option java_outer_classname = "TypesProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

// LINT.IfChange
enum DataType {
  // Not a legal value for DataType.  Used to indicate a DataType field
  // has not been set.
  DT_INVALID = 0;

  // Data types that all computation devices are expected to be
  // capable to support.
  DT_FLOAT = 1;
  DT_DOUBLE = 2;
  DT_INT32 = 3;
  DT_UINT8 = 4;
  DT_INT16 = 5;
  DT_INT8 = 6;
  DT_STRING = 7;
  DT_COMPLEX64 = 8;  // Single-precision complex
  DT_INT64 = 9;
  DT_BOOL = 10;
  DT_QINT8 = 11;     // Quantized int8
  DT_QUINT8 = 12;    // Quantized uint8
  DT_QINT32 = 13;    // Quantized int32
  DT_BFLOAT16 = 14;  // Float32 truncated to 16 bits.  Only for cast ops.
  DT_QINT16 = 15;    // Quantized int16
  DT_QUINT16 = 16;   // Quantized uint16
  DT_UINT16 = 17;
  DT_COMPLEX128 = 18;  // Double-precision complex
  DT_HALF = 19;
  DT_RESOURCE = 20;

  // TODO(josh11b): DT_GENERIC_PROTO = ??;
  // TODO(jeff,josh11b): DT_UINT64?  DT_UINT32?

  // Do not use!  These are only for parameters.  Every enum above
  // should have a corresponding value below (verified by types_test).
  DT_FLOAT_REF = 101;
  DT_DOUBLE_REF = 102;
  DT_INT32_REF = 103;
  DT_UINT8_REF = 104;
  DT_INT16_REF = 105;
  DT_INT8_REF = 106;
  DT_STRING_REF = 107;
  DT_COMPLEX64_REF = 108;
  DT_INT64_REF = 109;
  DT_BOOL_REF = 110;
  DT_QINT8_REF = 111;
  DT_QUINT8_REF = 112;
  DT_QINT32_REF = 113;
  DT_BFLOAT16_REF = 114;
  DT_QINT16_REF = 115;
  DT_QUINT16_REF = 116;
  DT_UINT16_REF = 117;
  DT_COMPLEX128_REF = 118;
  DT_HALF_REF = 119;
  DT_RESOURCE_REF = 120;
}
// LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.h,https://www.tensorflow.org/code/tensorflow/go/tensor.go)


================================================
FILE: tensorboardX/tensorboardX/proto/types_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/types.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf.internal import enum_type_wrapper
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/types.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\030org.tensorflow.frameworkB\013TypesProtosP\001\370\001\001'),
  serialized_pb=_b('\n\x1etensorboardX/proto/types.proto\x12\x0ctensorboardX*\xc2\x05\n\x08\x44\x61taType\x12\x0e\n\nDT_INVALID\x10\x00\x12\x0c\n\x08\x44T_FLOAT\x10\x01\x12\r\n\tDT_DOUBLE\x10\x02\x12\x0c\n\x08\x44T_INT32\x10\x03\x12\x0c\n\x08\x44T_UINT8\x10\x04\x12\x0c\n\x08\x44T_INT16\x10\x05\x12\x0b\n\x07\x44T_INT8\x10\x06\x12\r\n\tDT_STRING\x10\x07\x12\x10\n\x0c\x44T_COMPLEX64\x10\x08\x12\x0c\n\x08\x44T_INT64\x10\t\x12\x0b\n\x07\x44T_BOOL\x10\n\x12\x0c\n\x08\x44T_QINT8\x10\x0b\x12\r\n\tDT_QUINT8\x10\x0c\x12\r\n\tDT_QINT32\x10\r\x12\x0f\n\x0b\x44T_BFLOAT16\x10\x0e\x12\r\n\tDT_QINT16\x10\x0f\x12\x0e\n\nDT_QUINT16\x10\x10\x12\r\n\tDT_UINT16\x10\x11\x12\x11\n\rDT_COMPLEX128\x10\x12\x12\x0b\n\x07\x44T_HALF\x10\x13\x12\x0f\n\x0b\x44T_RESOURCE\x10\x14\x12\x10\n\x0c\x44T_FLOAT_REF\x10\x65\x12\x11\n\rDT_DOUBLE_REF\x10\x66\x12\x10\n\x0c\x44T_INT32_REF\x10g\x12\x10\n\x0c\x44T_UINT8_REF\x10h\x12\x10\n\x0c\x44T_INT16_REF\x10i\x12\x0f\n\x0b\x44T_INT8_REF\x10j\x12\x11\n\rDT_STRING_REF\x10k\x12\x14\n\x10\x44T_COMPLEX64_REF\x10l\x12\x10\n\x0c\x44T_INT64_REF\x10m\x12\x0f\n\x0b\x44T_BOOL_REF\x10n\x12\x10\n\x0c\x44T_QINT8_REF\x10o\x12\x11\n\rDT_QUINT8_REF\x10p\x12\x11\n\rDT_QINT32_REF\x10q\x12\x13\n\x0f\x44T_BFLOAT16_REF\x10r\x12\x11\n\rDT_QINT16_REF\x10s\x12\x12\n\x0e\x44T_QUINT16_REF\x10t\x12\x11\n\rDT_UINT16_REF\x10u\x12\x15\n\x11\x44T_COMPLEX128_REF\x10v\x12\x0f\n\x0b\x44T_HALF_REF\x10w\x12\x13\n\x0f\x44T_RESOURCE_REF\x10xB,\n\x18org.tensorflow.frameworkB\x0bTypesProtosP\x01\xf8\x01\x01\x62\x06proto3')
)

_DATATYPE = _descriptor.EnumDescriptor(
  name='DataType',
  full_name='tensorboardX.DataType',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='DT_INVALID', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_FLOAT', index=1, number=1,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_DOUBLE', index=2, number=2,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_INT32', index=3, number=3,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_UINT8', index=4, number=4,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_INT16', index=5, number=5,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_INT8', index=6, number=6,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_STRING', index=7, number=7,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_COMPLEX64', index=8, number=8,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_INT64', index=9, number=9,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_BOOL', index=10, number=10,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_QINT8', index=11, number=11,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_QUINT8', index=12, number=12,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_QINT32', index=13, number=13,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_BFLOAT16', index=14, number=14,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_QINT16', index=15, number=15,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_QUINT16', index=16, number=16,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_UINT16', index=17, number=17,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_COMPLEX128', index=18, number=18,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_HALF', index=19, number=19,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_RESOURCE', index=20, number=20,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_FLOAT_REF', index=21, number=101,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_DOUBLE_REF', index=22, number=102,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_INT32_REF', index=23, number=103,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_UINT8_REF', index=24, number=104,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_INT16_REF', index=25, number=105,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_INT8_REF', index=26, number=106,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_STRING_REF', index=27, number=107,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_COMPLEX64_REF', index=28, number=108,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_INT64_REF', index=29, number=109,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_BOOL_REF', index=30, number=110,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_QINT8_REF', index=31, number=111,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_QUINT8_REF', index=32, number=112,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_QINT32_REF', index=33, number=113,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_BFLOAT16_REF', index=34, number=114,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_QINT16_REF', index=35, number=115,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_QUINT16_REF', index=36, number=116,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_UINT16_REF', index=37, number=117,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_COMPLEX128_REF', index=38, number=118,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_HALF_REF', index=39, number=119,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='DT_RESOURCE_REF', index=40, number=120,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=49,
  serialized_end=755,
)
_sym_db.RegisterEnumDescriptor(_DATATYPE)

DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
DT_INVALID = 0
DT_FLOAT = 1
DT_DOUBLE = 2
DT_INT32 = 3
DT_UINT8 = 4
DT_INT16 = 5
DT_INT8 = 6
DT_STRING = 7
DT_COMPLEX64 = 8
DT_INT64 = 9
DT_BOOL = 10
DT_QINT8 = 11
DT_QUINT8 = 12
DT_QINT32 = 13
DT_BFLOAT16 = 14
DT_QINT16 = 15
DT_QUINT16 = 16
DT_UINT16 = 17
DT_COMPLEX128 = 18
DT_HALF = 19
DT_RESOURCE = 20
DT_FLOAT_REF = 101
DT_DOUBLE_REF = 102
DT_INT32_REF = 103
DT_UINT8_REF = 104
DT_INT16_REF = 105
DT_INT8_REF = 106
DT_STRING_REF = 107
DT_COMPLEX64_REF = 108
DT_INT64_REF = 109
DT_BOOL_REF = 110
DT_QINT8_REF = 111
DT_QUINT8_REF = 112
DT_QINT32_REF = 113
DT_BFLOAT16_REF = 114
DT_QINT16_REF = 115
DT_QUINT16_REF = 116
DT_UINT16_REF = 117
DT_COMPLEX128_REF = 118
DT_HALF_REF = 119
DT_RESOURCE_REF = 120


DESCRIPTOR.enum_types_by_name['DataType'] = _DATATYPE
_sym_db.RegisterFileDescriptor(DESCRIPTOR)


DESCRIPTOR._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto/versions.proto
================================================
syntax = "proto3";

package tensorboardX;
option cc_enable_arenas = true;
option java_outer_classname = "VersionsProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

// Version information for a piece of serialized data
//
// There are different types of versions for each type of data
// (GraphDef, etc.), but they all have the same common shape
// described here.
//
// Each consumer has "consumer" and "min_producer" versions (specified
// elsewhere).  A consumer is allowed to consume this data if
//
//   producer >= min_producer
//   consumer >= min_consumer
//   consumer not in bad_consumers
//
message VersionDef {
  // The version of the code that produced this data.
  int32 producer = 1;

  // Any consumer below this version is not allowed to consume this data.
  int32 min_consumer = 2;

  // Specific consumer versions which are disallowed (e.g. due to bugs).
  repeated int32 bad_consumers = 3;
};


================================================
FILE: tensorboardX/tensorboardX/proto/versions_pb2.py
================================================
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: tensorboardX/proto/versions.proto

import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor.FileDescriptor(
  name='tensorboardX/proto/versions.proto',
  package='tensorboardX',
  syntax='proto3',
  serialized_options=_b('\n\030org.tensorflow.frameworkB\016VersionsProtosP\001\370\001\001'),
  serialized_pb=_b('\n!tensorboardX/proto/versions.proto\x12\x0ctensorboardX\"K\n\nVersionDef\x12\x10\n\x08producer\x18\x01 \x01(\x05\x12\x14\n\x0cmin_consumer\x18\x02 \x01(\x05\x12\x15\n\rbad_consumers\x18\x03 \x03(\x05\x42/\n\x18org.tensorflow.frameworkB\x0eVersionsProtosP\x01\xf8\x01\x01\x62\x06proto3')
)


_VERSIONDEF = _descriptor.Descriptor(
  name='VersionDef',
  full_name='tensorboardX.VersionDef',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='producer', full_name='tensorboardX.VersionDef.producer', index=0,
      number=1, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='min_consumer', full_name='tensorboardX.VersionDef.min_consumer', index=1,
      number=2, type=5, cpp_type=1, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='bad_consumers', full_name='tensorboardX.VersionDef.bad_consumers', index=2,
      number=3, type=5, cpp_type=1, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=51,
  serialized_end=126,
)

DESCRIPTOR.message_types_by_name['VersionDef'] = _VERSIONDEF
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

VersionDef = _reflection.GeneratedProtocolMessageType('VersionDef', (_message.Message,), dict(
  DESCRIPTOR = _VERSIONDEF,
  __module__ = 'tensorboardX.proto.versions_pb2'
  # @@protoc_insertion_point(class_scope:tensorboardX.VersionDef)
  ))
_sym_db.RegisterMessage(VersionDef)


DESCRIPTOR._options = None
# @@protoc_insertion_point(module_scope)


================================================
FILE: tensorboardX/tensorboardX/proto_graph.py
================================================
from .proto.graph_pb2 import GraphDef
from .proto.node_def_pb2 import NodeDef
from .proto.versions_pb2 import VersionDef
from .proto.attr_value_pb2 import AttrValue
from .proto.tensor_shape_pb2 import TensorShapeProto


def attr_value_proto(dtype, shape, s):
    """Creates a dict of objects matching
    https://github.com/tensorflow/tensorboard/blob/master/tensorboard/compat/proto/attr_value.proto
    specifically designed for a NodeDef. The values have been
    reverse engineered from standard TensorBoard logged data.
    """
    attr = {}
    if s is not None:
        attr['attr'] = AttrValue(s=s.encode(encoding='utf_8'))
    if shape is not None:
        shapeproto = tensor_shape_proto(shape)
        attr['_output_shapes'] = AttrValue(list=AttrValue.ListValue(shape=[shapeproto]))
    return attr


def tensor_shape_proto(outputsize):
    """Creates an object matching
    https://github.com/tensorflow/tensorboard/blob/master/tensorboard/compat/proto/tensor_shape.proto
    """
    return TensorShapeProto(dim=[TensorShapeProto.Dim(size=d) for d in outputsize])


def node_proto(name,
               op='UnSpecified',
               input=None,
               dtype=None,
               shape=None,  # type: tuple
               outputsize=None,
               attributes=''
               ):
    """Creates an object matching
    https://github.com/tensorflow/tensorboard/blob/master/tensorboard/compat/proto/node_def.proto
    """
    if input is None:
        input = []
    if not isinstance(input, list):
        input = [input]
    return NodeDef(
        name=name.encode(encoding='utf_8'),
        op=op,
        input=input,
        attr=attr_value_proto(dtype, outputsize, attributes)
    )


================================================
FILE: tensorboardX/tensorboardX/pytorch_graph.py
================================================
import logging
import time
from collections import OrderedDict
from .proto.attr_value_pb2 import AttrValue
from .proto.graph_pb2 import GraphDef
from .proto.node_def_pb2 import NodeDef
from .proto.step_stats_pb2 import RunMetadata, StepStats, DeviceStepStats, NodeExecStats, AllocatorMemoryUsed
from .proto.tensor_shape_pb2 import TensorShapeProto
from .proto.versions_pb2 import VersionDef
from .proto_graph import node_proto

methods_OP = ['attributeNames', 'hasMultipleOutputs', 'hasUses', 'inputs',
              'kind', 'outputs', 'outputsSize', 'scopeName']
methods_IO = ['node', 'offset', 'debugName']  # 'unique' <int> , 'type' <Tensor<class 'torch._C.Type'>>

backward_mode = False

class NodeBase(object):
    def __init__(self,
                 debugName=None,
                 inputs=None,
                 scope=None,
                 tensor_size=None,
                 op_type='UnSpecified',
                 attributes=''):
        self.debugName = debugName
        self.inputs = inputs
        self.tensor_size = tensor_size
        self.kind = op_type
        self.attributes = attributes
        if scope is not None:
            self.scope = scope

    def __repr__(self):
        repr = []
        repr.append(str(type(self)))
        for m in dir(self):
            if '__' not in m:
                repr.append(m + ': ' + str(getattr(self, m)) + str(type(getattr(self, m))))
        return '\n'.join(repr) + '\n\n'


class NodePy(NodeBase):
    def __init__(self, node_cpp, valid_methods):
        super(NodePy, self).__init__(node_cpp)
        valid_methods = valid_methods[:]
        self.inputs = []
        global backward_mode
        for m in valid_methods:
            if m == 'inputs' or m == 'outputs':
                list_of_node = list(getattr(node_cpp, m)())
                io_unique_names = []
                io_tensor_sizes = []
                for n in list_of_node:
                    if backward_mode:
                        io_unique_names.append(n.uniqueName())
                    else:
                        io_unique_names.append(n.debugName())

                    if n.type().kind() == 'CompleteTensorType':
                        io_tensor_sizes.append(n.type().sizes())
                    else:
                        io_tensor_sizes.append(None)

                setattr(self, m, io_unique_names)
                setattr(self, m + 'tensor_size', io_tensor_sizes)

            else:
                if m == 'debugName' and backward_mode:
                    setattr(self, m, getattr(node_cpp, 'uniqueName')())
                else:
                    setattr(self, m, getattr(node_cpp, m)())


class NodePyIO(NodePy):
    def __init__(self, node_cpp, input_or_output=None):
        super(NodePyIO, self).__init__(node_cpp, methods_IO)
        try:
            tensor_size = node_cpp.type().sizes()
        except RuntimeError:
            tensor_size = [1, ]  # fail when constant model is used.
        self.tensor_size = tensor_size
        # Kind attribute string is purely descriptive and will be shown
        # in detailed information for the node in TensorBoard's graph plugin.
        #
        # NodePyOP nodes get this from their kind() method.
        self.kind = 'Parameter'
        if input_or_output:
            self.input_or_output = input_or_output
            self.kind = 'IO Node'


class NodePyOP(NodePy):
    def __init__(self, node_cpp):
        super(NodePyOP, self).__init__(node_cpp, methods_OP)
        # Replace single quote which causes strange behavior in TensorBoard
        # TODO: See if we can remove this in the future
        self.attributes = str({k: node_cpp[k] for k in node_cpp.attributeNames()}).replace("'", ' ')
        self.kind = node_cpp.kind()


class GraphPy(object):
    """Helper class to convert torch.nn.Module to GraphDef proto and visualization
    with TensorBoard.

    GraphDef generation operates in two passes:

    In the first pass, all nodes are read and saved to two lists.
    One list is for input/output nodes (nodes_io), which only have inbound
    or outbound connections, but not both. Another list is for internal
    operator nodes (nodes_op). The first pass also saves all scope name
    appeared in the nodes in scope_name_appeared list for later processing.

    In the second pass, scope names are fully applied to all nodes.
    debugNameToScopedName is a mapping from a node's ID to its fully qualified
    scope name. e.g. Net1/Linear[0]/1. Unfortunately torch.jit doesn't have
    totally correct scope output, so this is nontrivial. The function
    populate_namespace_from_OP_to_IO and find_common_root are used to
    assign scope name to a node based on the connection between nodes
    in a heuristic kind of way. Bookkeeping is done with shallowest_scope_name
    and scope_name_appeared.
    """
    def __init__(self):
        self.nodes_op = []
        self.nodes_io = OrderedDict()
        self.unique_name_to_scoped_name = {}
        self.shallowest_scope_name = 'default'
        self.scope_name_appeared = []

    def append(self, x):
        if isinstance(x, NodePyIO):
            self.nodes_io[x.debugName] = x
        if isinstance(x, NodePyOP):
            self.nodes_op.append(x)
            for node_output, outputSize in zip(x.outputs, x.outputstensor_size):
                self.scope_name_appeared.append(x.scopeName)
                self.nodes_io[node_output] = NodeBase(node_output,
                                                      x.inputs,
                                                      x.scopeName,
                                                      outputSize,
                                                      op_type=x.kind,
                                                      attributes=x.attributes)

    def printall(self):
        print('all nodes')
        for node in self.nodes_op:
            print(node)
        for key in self.nodes_io:
            print(self.nodes_io[key])

    def find_common_root(self):
        for fullscope in self.scope_name_appeared:
            if fullscope:
                self.shallowest_scope_name = fullscope.split('/')[0]

    def populate_namespace_from_OP_to_IO(self):
        for node in self.nodes_op:
            for input_node_id in node.inputs:
                self.unique_name_to_scoped_name[input_node_id] = node.scopeName + '/' + input_node_id

        for key, node in self.nodes_io.items():
            if type(node) == NodeBase:
                self.unique_name_to_scoped_name[key] = node.scope + '/' + node.debugName
            if hasattr(node, 'input_or_output'):
                self.unique_name_to_scoped_name[key] = node.input_or_output + '/' + node.debugName
            if hasattr(node, 'scope'):
                if node.scope == '' and self.shallowest_scope_name:
                    self.unique_name_to_scoped_name[node.debugName] = \
                        self.shallowest_scope_name + '/' + node.debugName

        # replace name
        for key, node in self.nodes_io.items():
            self.nodes_io[key].inputs = \
                [self.unique_name_to_scoped_name[node_input_id] for node_input_id in node.inputs]
            if node.debugName in self.unique_name_to_scoped_name:
                self.nodes_io[key].debugName = self.unique_name_to_scoped_name[node.debugName]

    def to_proto(self):
        """
        Converts graph representation of GraphPy object to TensorBoard
        required format.
        """
        # TODO: compute correct memory usage and CPU time once
        # PyTorch supports it
        import numpy as np
        nodes = []
        node_stats = []
        for v in self.nodes_io.values():
            nodes.append(node_proto(v.debugName,
                                    input=v.inputs,
                                    outputsize=v.tensor_size,
                                    op=v.kind,
                                    attributes=v.attributes))

            if v.tensor_size and len(v.tensor_size) > 0:  # assume data is float32, only parameter is counted
                node_stats.append(
                    NodeExecStats(node_name=v.debugName,
                                  all_start_micros=int(time.time() * 1e7),
                                  all_end_rel_micros=42,
                                  memory=[AllocatorMemoryUsed(allocator_name="cpu",
                                                              total_bytes=int(np.prod(v.tensor_size)) * 4)]))

        return nodes, node_stats


# one argument: 'hasAttribute', 'hasAttributes',
def parse(graph, args=None, omit_useless_nodes=True):
    """This method parses an optimized PyTorch model graph and produces
    a list of nodes and node stats for eventual conversion to TensorBoard
    protobuf format.

    Args:
      graph (PyTorch module): The model to be parsed.
      args (tuple): input tensor[s] for the model.
      omit_useless_nodes (boolean): Whether to remove nodes from the graph.
    """
    import torch
    n_inputs = len(args)  # not sure...

    nodes_py = GraphPy()
    for i, node in enumerate(graph.inputs()):
        global backward_mode
        if not backward_mode:
            try:
                node.debugName()
            except:
                backward_mode = True
        if omit_useless_nodes:
            if len(node.uses()) == 0:  # number of user of the node (= number of outputs/ fanout)
                continue

        if i < n_inputs:
            nodes_py.append(NodePyIO(node, 'input'))
        else:
            nodes_py.append(NodePyIO(node))  # parameter

    for node in graph.nodes():
        nodes_py.append(NodePyOP(node))

    for node in graph.outputs():  # must place last.
        NodePyIO(node, 'output')
    nodes_py.find_common_root()
    nodes_py.populate_namespace_from_OP_to_IO()
    return nodes_py.to_proto()


def graph(model, args, verbose=False, **kwargs):
    """
    This method processes a PyTorch model and produces a `GraphDef` proto
    that can be logged to TensorBoard.

    Args:
      model (PyTorch module): The model to be parsed.
      args (tuple): input tensor[s] for the model.
      verbose (bool): Whether to print out verbose information while
        processing.
    """
    import torch

    with torch.onnx.set_training(model, False):  # TODO: move outside of torch.onnx
        try:
            trace = torch.jit.trace(model, args)
            graph = trace.graph

        except RuntimeError as e:
            print(e)
            print('Error occurs, No graph saved')
            raise e
            # Create an object matching
            # https://github.com/tensorflow/tensorboard/blob/master/tensorboard/compat/proto/graph.proto
            # The producer version has been reverse engineered from standard
            # TensorBoard logged data.

    if verbose:
        print(graph)
    list_of_nodes, node_stats = parse(graph, args)
    # We are hardcoding that this was run on CPU even though it might have actually
    # run on GPU. Note this is what is shown in TensorBoard and has no bearing
    # on actual execution.
    # TODO: See if we can extract GPU vs CPU information from the PyTorch model
    # and pass it correctly to TensorBoard.
    #
    # Definition of StepStats and DeviceStepStats can be found at
    # https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/graph/tf_graph_common/test/graph-test.ts
    # and
    # https://github.com/tensorflow/tensorboard/blob/master/tensorboard/compat/proto/step_stats.proto
    stepstats = RunMetadata(step_stats=StepStats(dev_stats=[DeviceStepStats(device="/device:CPU:0",
                                                                            node_stats=node_stats)]))
    return GraphDef(node=list_of_nodes, versions=VersionDef(producer=22)), stepstats


================================================
FILE: tensorboardX/tensorboardX/record_writer.py
================================================
"""
To write tf_record into file. Here we use it for tensorboard's event writting.
The code was borrowed from https://github.com/TeamHG-Memex/tensorboard_logger
"""

import copy
import io
import os.path
import re
import struct
try:
    import boto3
    S3_ENABLED = True
except ImportError:
    S3_ENABLED = False

from .crc32c import crc32c


_VALID_OP_NAME_START = re.compile('^[A-Za-z0-9.]')
_VALID_OP_NAME_PART = re.compile('[A-Za-z0-9_.\\-/]+')

# Registry of writer factories by prefix backends.
#
# Currently supports "s3://" URLs for S3 based on boto and falls
# back to local filesystem.
REGISTERED_FACTORIES = {}


def register_writer_factory(prefix, factory):
    if ':' in prefix:
        raise ValueError('prefix cannot contain a :')
    REGISTERED_FACTORIES[prefix] = factory


def directory_check(path):
    '''Initialize the directory for log files.'''
    try:
        prefix = path.split(':')[0]
        factory = REGISTERED_FACTORIES[prefix]
        return factory.directory_check(path)
    except KeyError:
        if not os.path.exists(path):
            os.makedirs(path)


def open_file(path):
    '''Open a writer for outputting event files.'''
    try:
        prefix = path.split(':')[0]
        factory = REGISTERED_FACTORIES[prefix]
        return factory.open(path)
    except KeyError:
        return open(path, 'wb')


class S3RecordWriter(object):
    """Writes tensorboard protocol buffer files to S3."""

    def __init__(self, path):
        if not S3_ENABLED:
            raise ImportError("boto3 must be installed for S3 support.")
        self.path = path
        self.buffer = io.BytesIO()

    def __del__(self):
        self.close()

    def bucket_and_path(self):
        path = self.path
        if path.startswith("s3://"):
            path = path[len("s3://"):]
        bp = path.split("/")
        bucket = bp[0]
        path = path[1 + len(bucket):]
        return bucket, path

    def write(self, val):
        self.buffer.write(val)

    def flush(self):
        s3 = boto3.client('s3')
        bucket, path = self.bucket_and_path()
        upload_buffer = copy.copy(self.buffer)
        upload_buffer.seek(0)
        s3.upload_fileobj(upload_buffer, bucket, path)

    def close(self):
        self.flush()


class S3RecordWriterFactory(object):
    """Factory for event protocol buffer files to S3."""

    def open(self, path):
        return S3RecordWriter(path)

    def directory_check(self, path):
        # S3 doesn't need directories created before files are added
        # so we can just skip this check
        pass


register_writer_factory("s3", S3RecordWriterFactory())


class RecordWriter(object):
    def __init__(self, path):
        self._name_to_tf_name = {}
        self._tf_names = set()
        self.path = path
        self._writer = None
        self._writer = open_file(path)

    def write(self, data):
        w = self._writer.write
        header = struct.pack('Q', len(data))
        w(header)
        w(struct.pack('I', masked_crc32c(header)))
        w(data)
        w(struct.pack('I', masked_crc32c(data)))

    def flush(self):
        self._writer.flush()

    def close(self):
        self._writer.close()


def masked_crc32c(data):
    x = u32(crc32c(data))
    return u32(((x >> 15) | u32(x << 17)) + 0xa282ead8)


def u32(x):
    return x & 0xffffffff


def make_valid_tf_name(name):
    if not _VALID_OP_NAME_START.match(name):
        # Must make it valid somehow, but don't want to remove stuff
        name = '.' + name
    return '_'.join(_VALID_OP_NAME_PART.findall(name))


================================================
FILE: tensorboardX/tensorboardX/summary.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging
import numpy as np
import os
import re as _re

# pylint: disable=unused-import
from six.moves import range

from .proto.summary_pb2 import Summary
from .proto.summary_pb2 import HistogramProto
from .proto.summary_pb2 import SummaryMetadata
from .proto.tensor_pb2 import TensorProto
from .proto.tensor_shape_pb2 import TensorShapeProto
from .proto.plugin_pr_curve_pb2 import PrCurvePluginData
from .proto.plugin_text_pb2 import TextPluginData
from .proto.plugin_mesh_pb2 import MeshPluginData
from .proto import layout_pb2
from .x2num import make_np
from .utils import _prepare_video, convert_to_HWC

_INVALID_TAG_CHARACTERS = _re.compile(r'[^-/\w\.]')


def _clean_tag(name):
    # In the past, the first argument to summary ops was a tag, which allowed
    # arbitrary characters. Now we are changing the first argument to be the node
    # name. This has a number of advantages (users of summary ops now can
    # take advantage of the tf name scope system) but risks breaking existing
    # usage, because a much smaller set of characters are allowed in node names.
    # This function replaces all illegal characters with _s, and logs a warning.
    # It also strips leading slashes from the name.
    if name is not None:
        new_name = _INVALID_TAG_CHARACTERS.sub('_', name)
        new_name = new_name.lstrip('/')  # Remove leading slashes
        if new_name != name:
            logging.info(
                'Summary name %s is illegal; using %s instead.' % (name, new_name))
            name = new_name
    return name


def _draw_single_box(image, xmin, ymin, xmax, ymax, display_str, color='black', color_text='black', thickness=2):
    from PIL import ImageDraw, ImageFont
    font = ImageFont.load_default()
    draw = ImageDraw.Draw(image)
    (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
    draw.line([(left, top), (left, bottom), (right, bottom),
               (right, top), (left, top)], width=thickness, fill=color)
    if display_str:
        text_bottom = bottom
        # Reverse list and print from bottom to top.
        text_width, text_height = font.getsize(display_str)
        margin = np.ceil(0.05 * text_height)
        draw.rectangle(
            [(left, text_bottom - text_height - 2 * margin),
             (left + text_width, text_bottom)], fill=color
        )
        draw.text(
            (left + margin, text_bottom - text_height - margin),
            display_str, fill=color_text, font=font
        )
    return image


def hparams(hparam_dict=None, metric_dict=None):
    from tensorboardX.proto.plugin_hparams_pb2 import HParamsPluginData, SessionEndInfo, SessionStartInfo
    from tensorboardX.proto.api_pb2 import Experiment, HParamInfo, MetricInfo, MetricName, Status
    from six import string_types

    PLUGIN_NAME = 'hparams'
    PLUGIN_DATA_VERSION = 0

    EXPERIMENT_TAG = '_hparams_/experiment'
    SESSION_START_INFO_TAG = '_hparams_/session_start_info'
    SESSION_END_INFO_TAG = '_hparams_/session_end_info'

    # TODO: expose other parameters in the future.
    # hp = HParamInfo(name='lr',display_name='learning rate', type=DataType.DATA_TYPE_FLOAT64, domain_interval=Interval(min_value=10, max_value=100))  # noqa E501
    # mt = MetricInfo(name=MetricName(tag='accuracy'), display_name='accuracy', description='', dataset_type=DatasetType.DATASET_VALIDATION)  # noqa E501
    # exp = Experiment(name='123', description='456', time_created_secs=100.0, hparam_infos=[hp], metric_infos=[mt], user='tw')  # noqa E501

    hps = [HParamInfo(name=k) for k in hparam_dict.keys()]
    mts = [MetricInfo(name=MetricName(tag=k)) for k in metric_dict.keys()]

    exp = Experiment(hparam_infos=hps, metric_infos=mts)

    content = HParamsPluginData(experiment=exp, version=PLUGIN_DATA_VERSION)
    smd = SummaryMetadata(plugin_data=SummaryMetadata.PluginData(plugin_name=PLUGIN_NAME,
                                                                 content=content.SerializeToString()))
    exp = Summary(value=[Summary.Value(tag=EXPERIMENT_TAG, metadata=smd)])

    ssi = SessionStartInfo()
    for k, v in hparam_dict.items():
        if isinstance(v, string_types):
            ssi.hparams[k].string_value = v
            continue

        if isinstance(v, bool):
            ssi.hparams[k].bool_value = v
            continue

        if not isinstance(v, int) or not isinstance(v, float):
            v = make_np(v)[0]
            ssi.hparams[k].number_value = v

    content = HParamsPluginData(session_start_info=ssi, version=PLUGIN_DATA_VERSION)
    smd = SummaryMetadata(plugin_data=SummaryMetadata.PluginData(plugin_name=PLUGIN_NAME,
                                                                 content=content.SerializeToString()))
    ssi = Summary(value=[Summary.Value(tag=SESSION_START_INFO_TAG, metadata=smd)])

    sei = SessionEndInfo(status=Status.STATUS_SUCCESS)
    content = HParamsPluginData(session_end_info=sei, version=PLUGIN_DATA_VERSION)
    smd = SummaryMetadata(plugin_data=SummaryMetadata.PluginData(plugin_name=PLUGIN_NAME,
                                                                 content=content.SerializeToString()))
    sei = Summary(value=[Summary.Value(tag=SESSION_END_INFO_TAG, metadata=smd)])

    return exp, ssi, sei


def scalar(name, scalar, collections=None):
    """Outputs a `Summary` protocol buffer containing a single scalar value.
    The generated Summary has a Tensor.proto containing the input Tensor.
    Args:
      name: A name for the generated node. Will also serve as the series name in
        TensorBoard.
      tensor: A real numeric Tensor containing a single value.
      collections: Optional list of graph collections keys. The new summary op is
        added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
    Returns:
      A scalar `Tensor` of type `string`. Which contains a `Summary` protobuf.
    Raises:
      ValueError: If tensor has the wrong shape or type.
    """
    name = _clean_tag(name)
    scalar = make_np(scalar)
    assert(scalar.squeeze().ndim == 0), 'scalar should be 0D'
    scalar = float(scalar)
    return Summary(value=[Summary.Value(tag=name, simple_value=scalar)])


def histogram_raw(name, min, max, num, sum, sum_squares, bucket_limits, bucket_counts):
    # pylint: disable=line-too-long
    """Outputs a `Summary` protocol buffer with a histogram.
    The generated
    [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
    has one summary value containing a histogram for `values`.
    Args:
      name: A name for the generated node. Will also serve as a series name in
        TensorBoard.
      min: A float or int min value
      max: A float or int max value
      num: Int number of values
      sum: Float or int sum of all values
      sum_squares: Float or int sum of squares for all values
      bucket_limits: A numeric `Tensor` with upper value per bucket
      bucket_counts: A numeric `Tensor` with number of values per bucket
    Returns:
      A scalar `Tensor` of type `string`. The serialized `Summary` protocol
      buffer.
    """
    hist = HistogramProto(min=min,
                          max=max,
                          num=num,
                          sum=sum,
                          sum_squares=sum_squares,
                          bucket_limit=bucket_limits,
                          bucket=bucket_counts)
    return Summary(value=[Summary.Value(tag=name, histo=hist)])


def histogram(name, values, bins, max_bins=None):
    # pylint: disable=line-too-long
    """Outputs a `Summary` protocol buffer with a histogram.
    The generated
    [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
    has one summary value containing a histogram for `values`.
    This op reports an `InvalidArgument` error if any value is not finite.
    Args:
      name: A name for the generated node. Will also serve as a series name in
        TensorBoard.
      values: A real numeric `Tensor`. Any shape. Values to use to
        build the histogram.
    Returns:
      A scalar `Tensor` of type `string`. The serialized `Summary` protocol
      buffer.
    """
    name = _clean_tag(name)
    values = make_np(values)
    hist = make_histogram(values.astype(float), bins, max_bins)
    return Summary(value=[Summary.Value(tag=name, histo=hist)])


def make_histogram(values, bins, max_bins=None):
    """Convert values into a histogram proto using logic from histogram.cc."""
    if values.size == 0:
        raise ValueError('The input has no element.')
    values = values.reshape(-1)
    counts, limits = np.histogram(values, bins=bins)
    num_bins = len(counts)
    if max_bins is not None and num_bins > max_bins:
        subsampling = num_bins // max_bins
        subsampling_remainder = num_bins % subsampling
        if subsampling_remainder != 0:
            counts = np.pad(counts, pad_width=[[0, subsampling - subsampling_remainder]],
                            mode="constant", constant_values=0)
        counts = counts.reshape(-1, subsampling).sum(axis=-1)
        new_limits = np.empty((counts.size + 1,), limits.dtype)
        new_limits[:-1] = limits[:-1:subsampling]
        new_limits[-1] = limits[-1]
        limits = new_limits

    # Find the first and the last bin defining the support of the histogram:
    cum_counts = np.cumsum(np.greater(counts, 0, dtype=np.int32))
    start, end = np.searchsorted(cum_counts, [0, cum_counts[-1] - 1], side="right")
    start = int(start)
    end = int(end) + 1
    del cum_counts

    # TensorBoard only includes the right bin limits. To still have the leftmost limit
    # included, we include an empty bin left.
    # If start == 0, we need to add an empty one left, otherwise we can just include the bin left to the
    # first nonzero-count bin:
    counts = counts[start - 1:end] if start > 0 else np.concatenate([[0], counts[:end]])
    limits = limits[start:end + 1]

    if counts.size == 0 or limits.size == 0:
        raise ValueError('The histogram is empty, please file a bug report.')

    sum_sq = values.dot(values)
    return HistogramProto(min=values.min(),
                          max=values.max(),
                          num=len(values),
                          sum=values.sum(),
                          sum_squares=sum_sq,
                          bucket_limit=limits.tolist(),
                          bucket=counts.tolist())


def image(tag, tensor, rescale=1, dataformats='CHW'):
    """Outputs a `Summary` protocol buffer with images.
    The summary has up to `max_images` summary values containing images. The
    images are built from `tensor` which must be 3-D with shape `[height, width,
    channels]` and where `channels` can be:
    *  1: `tensor` is interpreted as Grayscale.
    *  3: `tensor` is interpreted as RGB.
    *  4: `tensor` is interpreted as RGBA.

    Args:
      tag: A name for the generated node. Will also serve as a series name in
        TensorBoard.
      tensor: A 3-D `uint8` or `float32` `Tensor` of shape `[height, width,
        channels]` where `channels` is 1, 3, or 4.
        'tensor' can either have values in [0, 1] (float32) or [0, 255] (uint8).
        The image() function will scale the image values to [0, 255] by applying
        a scale factor of either 1 (uint8) or 255 (float32).
    Returns:
      A scalar `Tensor` of type `string`. The serialized `Summary` protocol
      buffer.
    """
    tag = _clean_tag(tag)
    tensor = make_np(tensor)
    tensor = convert_to_HWC(tensor, dataformats)
    # Do not assume that user passes in values in [0, 255], use data type to detect
    if tensor.dtype != np.uint8:
        tensor = (tensor * 255.0).astype(np.uint8)

    image = make_image(tensor, rescale=rescale)
    return Summary(value=[Summary.Value(tag=tag, image=image)])


def image_boxes(tag, tensor_image, tensor_boxes, rescale=1, dataformats='CHW', labels=None):
    '''Outputs a `Summary` protocol buffer with images.'''
    tensor_image = make_np(tensor_image)
    tensor_image = convert_to_HWC(tensor_image, dataformats)
    tensor_boxes = make_np(tensor_boxes)

    if tensor_image.dtype != np.uint8:
        tensor_image = (tensor_image * 255.0).astype(np.uint8)

    image = make_image(tensor_image,
                       rescale=rescale,
                       rois=tensor_boxes, labels=labels)
    return Summary(value=[Summary.Value(tag=tag, image=image)])


def draw_boxes(disp_image, boxes, labels=None):
    # xyxy format
    num_boxes = boxes.shape[0]
    list_gt = range(num_boxes)
    for i in list_gt:
        disp_image = _draw_single_box(disp_image,
                                      boxes[i, 0],
                                      boxes[i, 1],
                                      boxes[i, 2],
                                      boxes[i, 3],
                                      display_str=None if labels is None else labels[i],
                                      color='Red')
    return disp_image


def make_image(tensor, rescale=1, rois=None, labels=None):
    """Convert an numpy representation image to Image protobuf"""
    from PIL import Image
    height, width, channel = tensor.shape
    scaled_height = int(height * rescale)
    scaled_width = int(width * rescale)
    image = Image.fromarray(tensor)
    if rois is not None:
        image = draw_boxes(image, rois, labels=labels)
    image = image.resize((scaled_width, scaled_height), Image.ANTIALIAS)
    import io
    output = io.BytesIO()
    image.save(output, format='PNG')
    image_string = output.getvalue()
    output.close()
    return Summary.Image(height=height,
                         width=width,
                         colorspace=channel,
                         encoded_image_string=image_string)


def video(tag, tensor, fps=4):
    tag = _clean_tag(tag)
    tensor = make_np(tensor)
    tensor = _prepare_video(tensor)
    # If user passes in uint8, then we don't need to rescale by 255
    if tensor.dtype != np.uint8:
        tensor = (tensor * 255.0).astype(np.uint8)

    video = make_video(tensor, fps)
    return Summary(value=[Summary.Value(tag=tag, image=video)])


def make_video(tensor, fps):
    try:
        import moviepy  # noqa: F401
    except ImportError:
        print('add_video needs package moviepy')
        return
    try:
        from moviepy import editor as mpy
    except ImportError:
        print("moviepy is installed, but can't import moviepy.editor.",
              "Some packages could be missing [imageio, requests]")
        return
    import tempfile

    t, h, w, c = tensor.shape

    # encode sequence of images into gif string
    clip = mpy.ImageSequenceClip(list(tensor), fps=fps)

    filename = tempfile.NamedTemporaryFile(suffix='.gif', delete=False).name
    try:  # older version of moviepy does not support progress_bar argument.
        clip.write_gif(filename, verbose=False, progress_bar=False)
    except TypeError:
        clip.write_gif(filename, verbose=False)

    with open(filename, 'rb') as f:
        tensor_string = f.read()

    try:
        os.remove(filename)
    except OSError:
        logging.warning('The temporary file used by moviepy cannot be deleted.')

    return Summary.Image(height=h, width=w, colorspace=c, encoded_image_string=tensor_string)


def audio(tag, tensor, sample_rate=44100):
    tensor = make_np(tensor)
    if abs(tensor).max() > 1:
        print('warning: audio amplitude out of range, auto clipped.')
        tensor = tensor.clip(-1, 1)
    assert(tensor.ndim == 2), 'input tensor should be 2 dimensional.'
    length_frames, num_channels = tensor.shape
    assert num_channels == 1 or num_channels == 2, f'Expected 1/2 channels, got {num_channels}'
    import soundfile
    import io
    with io.BytesIO() as fio:
        soundfile.write(fio, tensor, samplerate=sample_rate, format='wav')
        audio_string = fio.getvalue()
    audio = Summary.Audio(sample_rate=sample_rate,
                          num_channels=num_channels,
                          length_frames=length_frames,
                          encoded_audio_string=audio_string,
                          content_type='audio/wav')
    return Summary(value=[Summary.Value(tag=tag, audio=audio)])

def custom_scalars(layout):
    categoriesnames = layout.keys()
    categories = []
    layouts = []
    for k, v in layout.items():
        charts = []
        for chart_name, chart_meatadata in v.items():
            tags = chart_meatadata[1]
            if chart_meatadata[0] == 'Margin':
                assert len(tags) == 3
                mgcc = layout_pb2.MarginChartContent(series=[layout_pb2.MarginChartContent.Series(value=tags[0],
                                                                                                  lower=tags[1],
                                                                                                  upper=tags[2])])
                chart = layout_pb2.Chart(title=chart_name, margin=mgcc)
            else:
                mlcc = layout_pb2.MultilineChartContent(tag=tags)
                chart = layout_pb2.Chart(title=chart_name, multiline=mlcc)
            charts.append(chart)
        categories.append(layout_pb2.Category(title=k, chart=charts))

    layout = layout_pb2.Layout(category=categories)
    PluginData = SummaryMetadata.PluginData(plugin_name='custom_scalars')
    smd = SummaryMetadata(plugin_data=PluginData)
    tensor = TensorProto(dtype='DT_STRING',
                         string_val=[layout.SerializeToString()],
                         tensor_shape=TensorShapeProto())
    return Summary(value=[Summary.Value(tag='custom_scalars__config__', tensor=tensor, metadata=smd)])


def text(tag, text):
    import json
    PluginData = SummaryMetadata.PluginData(
        plugin_name='text', content=TextPluginData(version=0).SerializeToString())
    smd = SummaryMetadata(plugin_data=PluginData)
    tensor = TensorProto(dtype='DT_STRING',
                         string_val=[text.encode(encoding='utf_8')],
                         tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]))
    return Summary(value=[Summary.Value(tag=tag + '/text_summary', metadata=smd, tensor=tensor)])


def pr_curve_raw(tag, tp, fp, tn, fn, precision, recall, num_thresholds=127, weights=None):
    if num_thresholds > 127:  # weird, value > 127 breaks protobuf
        num_thresholds = 127
    data = np.stack((tp, fp, tn, fn, precision, recall))
    pr_curve_plugin_data = PrCurvePluginData(
        version=0, num_thresholds=num_thresholds).SerializeToString()
    PluginData = SummaryMetadata.PluginData(
        plugin_name='pr_curves', content=pr_curve_plugin_data)
    smd = SummaryMetadata(plugin_data=PluginData)
    tensor = TensorProto(dtype='DT_FLOAT',
                         float_val=data.reshape(-1).tolist(),
                         tensor_shape=TensorShapeProto(
                             dim=[TensorShapeProto.Dim(size=data.shape[0]), TensorShapeProto.Dim(size=data.shape[1])]))
    return Summary(value=[Summary.Value(tag=tag, metadata=smd, tensor=tensor)])


def pr_curve(tag, labels, predictions, num_thresholds=127, weights=None):
    # weird, value > 127 breaks protobuf
    num_thresholds = min(num_thresholds, 127)
    data = compute_curve(labels, predictions,
                         num_thresholds=num_thresholds, weights=weights)
    pr_curve_plugin_data = PrCurvePluginData(
        version=0, num_thresholds=num_thresholds).SerializeToString()
    PluginData = SummaryMetadata.PluginData(
        plugin_name='pr_curves', content=pr_curve_plugin_data)
    smd = SummaryMetadata(plugin_data=PluginData)
    tensor = TensorProto(dtype='DT_FLOAT',
                         float_val=data.reshape(-1).tolist(),
                         tensor_shape=TensorShapeProto(
                             dim=[TensorShapeProto.Dim(size=data.shape[0]), TensorShapeProto.Dim(size=data.shape[1])]))
    return Summary(value=[Summary.Value(tag=tag, metadata=smd, tensor=tensor)])


# https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/pr_curve/summary.py
def compute_curve(labels, predictions, num_thresholds=None, weights=None):
    _MINIMUM_COUNT = 1e-7

    if weights is None:
        weights = 1.0

    # Compute bins of true positives and false positives.
    bucket_indices = np.int32(np.floor(predictions * (num_thresholds - 1)))
    float_labels = labels.astype(np.float)
    histogram_range = (0, num_thresholds - 1)
    tp_buckets, _ = np.histogram(
        bucket_indices,
        bins=num_thresholds,
        range=histogram_range,
        weights=float_labels * weights)
    fp_buckets, _ = np.histogram(
        bucket_indices,
        bins=num_thresholds,
        range=histogram_range,
        weights=(1.0 - float_labels) * weights)

    # Obtain the reverse cumulative sum.
    tp = np.cumsum(tp_buckets[::-1])[::-1]
    fp = np.cumsum(fp_buckets[::-1])[::-1]
    tn = fp[0] - fp
    fn = tp[0] - tp
    precision = tp / np.maximum(_MINIMUM_COUNT, tp + fp)
    recall = tp / np.maximum(_MINIMUM_COUNT, tp + fn)
    return np.stack((tp, fp, tn, fn, precision, recall))


def _get_tensor_summary(tag, tensor, content_type, json_config):
    mesh_plugin_data = MeshPluginData(
        version=0,
        name=tag,
        content_type=content_type,
        json_config=json_config,
        shape=tensor.shape,
    )
    content = mesh_plugin_data.SerializeToString()
    smd = SummaryMetadata(
        plugin_data=SummaryMetadata.PluginData(
            plugin_name='mesh',
            content=content))

    tensor = TensorProto(dtype='DT_FLOAT',
                         float_val=tensor.reshape(-1).tolist(),
                         tensor_shape=TensorShapeProto(dim=[
                             TensorShapeProto.Dim(size=tensor.shape[0]),
                             TensorShapeProto.Dim(size=tensor.shape[1]),
                             TensorShapeProto.Dim(size=tensor.shape[2]),
                         ]))
    tensor_summary = Summary.Value(
        tag='{}_{}'.format(tag, content_type),
        tensor=tensor,
        metadata=smd,
    )
    return tensor_summary


def mesh(tag, vertices, colors, faces, config_dict=None):

    import json
    summaries = []
    tensors = [
        (vertices, 1),
        (faces, 2),
        (colors, 3)
    ]

    for tensor, content_type in tensors:
        if tensor is None:
            continue
        summaries.append(
            _get_tensor_summary(tag, make_np(tensor), content_type, json.dumps(config_dict, sort_keys=True)))

    return Summary(value=summaries)


================================================
FILE: tensorboardX/tensorboardX/torchvis.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import gc
import six
import time

from functools import wraps
from .writer import SummaryWriter
from .visdom_writer import VisdomWriter


# Supports both TensorBoard and Visdom (no embedding or graph visualization with Visdom)
vis_formats = {'tensorboard': SummaryWriter, 'visdom': VisdomWriter}


class TorchVis:
    def __init__(self, *args, **init_kwargs):
        """
        Args:
            args (list of strings): The name of the visualization target(s).
              Accepted targets are 'tensorboard' and 'visdom'.
            init_kwargs: Additional keyword parameters for the visdom writer (For example, server IP).
              See https://github.com/facebookresearch/visdom/blob/master/README.md#visdom-arguments-python-only
              for more.
        """
        self.subscribers = {}
        self.register(*args, **init_kwargs)

    def register(self, *args, **init_kwargs):
        # Sets tensorboard as the default visualization format if not specified
        formats = ['tensorboard'] if not args else args
        for format in formats:
            if self.subscribers.get(format) is None and format in vis_formats.keys():
                self.subscribers[format] = vis_formats[format](**init_kwargs.get(format, {}))

    def unregister(self, *args):
        for format in args:
            self.subscribers[format].close()
            del self.subscribers[format]
            gc.collect()

    def __getattr__(self, attr):
        for _, subscriber in six.iteritems(self.subscribers):
            def wrapper(*args, **kwargs):
                for _, subscriber in six.iteritems(self.subscribers):
                    if hasattr(subscriber, attr):
                        getattr(subscriber, attr)(*args, **kwargs)
            return wrapper
        raise AttributeError

    # Handle writer management (open/close) for the user
    def __del__(self):
        for _, subscriber in six.iteritems(self.subscribers):
            subscriber.close()


================================================
FILE: tensorboardX/tensorboardX/utils.py
================================================
# Functions for converting
def figure_to_image(figures, close=True):
    """Render matplotlib figure to numpy format.

    Note that this requires the ``matplotlib`` package.

    Args:
        figure (matplotlib.pyplot.figure) or list of figures: figure or a list of figures
        close (bool): Flag to automatically close the figure

    Returns:
        numpy.array: image in [CHW] order
    """
    import numpy as np
    try:
        import matplotlib.pyplot as plt
        import matplotlib.backends.backend_agg as plt_backend_agg
    except ModuleNotFoundError:
        print('please install matplotlib')

    def render_to_rgb(figure):
        canvas = plt_backend_agg.FigureCanvasAgg(figure)
        canvas.draw()
        data = np.frombuffer(canvas.buffer_rgba(), dtype=np.uint8)
        w, h = figure.canvas.get_width_height()
        image_hwc = data.reshape([h, w, 4])[:, :, 0:3]
        image_chw = np.moveaxis(image_hwc, source=2, destination=0)
        if close:
            plt.close(figure)
        return image_chw

    if isinstance(figures, list):
        images = [render_to_rgb(figure) for figure in figures]
        return np.stack(images)
    else:
        image = render_to_rgb(figures)
        return image


def graphviz_to_image():
    pass


def _prepare_video(V):
    import numpy as np
    b, t, c, h, w = V.shape

    if V.dtype == np.uint8:
        V = np.float32(V) / 255.

    def is_power2(num):
        return num != 0 and ((num & (num - 1)) == 0)

    # pad to nearest power of 2, all at once
    if not is_power2(V.shape[0]):
        len_addition = int(2**V.shape[0].bit_length() - V.shape[0])
        V = np.concatenate(
            (V, np.zeros(shape=(len_addition, t, c, h, w))), axis=0)

    n_rows = 2**((b.bit_length() - 1) // 2)
    n_cols = V.shape[0] // n_rows

    V = np.reshape(V, newshape=(n_rows, n_cols, t, c, h, w))
    V = np.transpose(V, axes=(2, 0, 4, 1, 5, 3))
    V = np.reshape(V, newshape=(t, n_rows * h, n_cols * w, c))

    return V


def make_grid(I, ncols=8):
    # I: N1HW or N3HW
    import numpy as np
    assert isinstance(
        I, np.ndarray), 'plugin error, should pass numpy array here'
    if I.shape[1] == 1:
        I = np.concatenate([I, I, I], 1)
    assert I.ndim == 4 and I.shape[1] == 3 or I.shape[1] == 4
    nimg = I.shape[0]
    H = I.shape[2]
    W = I.shape[3]
    ncols = min(nimg, ncols)
    nrows = int(np.ceil(float(nimg) / ncols))
    canvas = np.zeros((I.shape[1], H * nrows, W * ncols))
    i = 0
    for y in range(nrows):
        for x in range(ncols):
            if i >= nimg:
                break
            canvas[:, y * H:(y + 1) * H, x * W:(x + 1) * W] = I[i]
            i = i + 1
    return canvas

    # if modality == 'IMG':
    #     if x.dtype == np.uint8:
    #         x = x.astype(np.float32) / 255.0


def convert_to_HWC(tensor, input_format):  # tensor: numpy array
    import numpy as np
    assert(len(set(input_format)) == len(input_format)), "You can not use the same dimension shordhand twice. \
        input_format: {}".format(input_format)
    assert(len(tensor.shape) == len(input_format)), "size of input tensor and input format are different. \
        tensor shape: {}, input_format: {}".format(tensor.shape, input_format)
    input_format = input_format.upper()

    if len(input_format) == 4:
        index = [input_format.find(c) for c in 'NCHW']
        tensor_NCHW = tensor.transpose(index)
        tensor_CHW = make_grid(tensor_NCHW)
        return tensor_CHW.transpose(1, 2, 0)

    if len(input_format) == 3:
        index = [input_format.find(c) for c in 'HWC']
        tensor_HWC = tensor.transpose(index)
        if tensor_HWC.shape[2] == 1:
            tensor_HWC = np.concatenate([tensor_HWC, tensor_HWC, tensor_HWC], 2)
        return tensor_HWC

    if len(input_format) == 2:
        index = [input_format.find(c) for c in 'HW']
        tensor = tensor.transpose(index)
        tensor = np.stack([tensor, tensor, tensor], 2)
        return tensor


================================================
FILE: tensorboardX/tensorboardX/visdom_writer.py
================================================
import gc
import numpy as np
import math
import json
import time

from .summary import compute_curve
from .utils import figure_to_image
from .x2num import make_np


# Decorator that checks if there is a Visdom connection
def _check_connection(fn):
    def wrapper(self, *args, **kwargs):
        if not self.server_connected:
            print('ERROR: No Visdom server currently connected')
            self._try_connect()
            return
        fn(self, *args, **kwargs)
    return wrapper


class VisdomWriter:
    def __init__(self, *args, **kwargs):
        try:
            from visdom import Visdom
        except ImportError:
            raise ImportError(
                "Visdom visualization requires installation of Visdom")

        self.scalar_dict = {}
        self.server_connected = False
        self.vis = Visdom(*args, **kwargs)
        self.windows = {}

        self._try_connect()

    def _try_connect(self):
        startup_sec = 1
        self.server_connected = self.vis.check_connection()
        while not self.server_connected and startup_sec > 0:
            time.sleep(0.1)
            startup_sec -= 0.1
            self.server_connected = self.vis.check_connection()
        assert self.server_connected, 'No connection could be formed quickly'

    @_check_connection
    def add_scalar(self, tag, scalar_value, global_step=None, main_tag='default'):
        """Add scalar data to Visdom. Plots the values in a plot titled
           {main_tag}-{tag}.

        Args:
            tag (string): Data identifier
            scalar_value (float or string/blobname): Value to save
            global_step (int): Global step value to record
            main_tag (string): Data group identifier
        """
        if self.scalar_dict.get(main_tag) is None:
            self.scalar_dict[main_tag] = {}
        exists = self.scalar_dict[main_tag].get(tag) is not None
        self.scalar_dict[main_tag][tag] = self.scalar_dict[main_tag][tag] + \
            [scalar_value] if exists else [scalar_value]
        plot_name = '{}-{}'.format(main_tag, tag)
        # If there is no global_step provided, follow sequential order
        x_val = len(self.scalar_dict[main_tag][tag]
                    ) if not global_step else global_step
        if exists:
            # Update our existing Visdom window
            self.vis.line(
                X=make_np(x_val),
                Y=make_np(scalar_value),
                name=plot_name,
                update='append',
                win=self.windows[plot_name],
            )
        else:
            # Save the window if we are creating this graph for the first time
            self.windows[plot_name] = self.vis.line(
                X=make_np(x_val),
                Y=make_np(scalar_value),
                name=plot_name,
                opts={
                    'title': plot_name,
                    'xlabel': 'timestep',
                    'ylabel': tag,
                },
            )

    @_check_connection
    def add_scalars(self, main_tag, tag_scalar_dict, global_step=None):
        """Adds many scalar data to summary.

        Note that this function also keeps logged scalars in memory. In extreme case it explodes your RAM.

        Args:
            tag (string): Data identifier
            main_tag (string): Data group identifier
            tag_scalar_dict (dict): Key-value pair storing the tag and corresponding values
            global_step (int): Global step value to record

        Examples::

            writer.add_scalars('run_14h',{'xsinx':i*np.sin(i/r),
                                          'xcosx':i*np.cos(i/r),
                                          'arctanx': numsteps*np.arctan(i/r)}, i)
            This function adds three plots:
                'run_14h-xsinx',
                'run_14h-xcosx',
                'run_14h-arctanx'
            with the corresponding values.
        """
        for key in tag_scalar_dict.keys():
            self.add_scalar(key, tag_scalar_dict[key], global_step, main_tag)

    @_check_connection
    def export_scalars_to_json(self, path):
        """Exports to the given 'path' an ASCII file containing all the scalars written
        so far by this instance, with the following format:
        {writer_id : [[timestamp, step, value], ...], ...}

        The scalars saved by ``add_scalars()`` will be flushed after export.
        """
        with open(path, "w") as f:
            json.dump(self.scalar_dict, f)
        self.scalar_dict = {}

    @_check_connection
    def add_histogram(self, tag, values, global_step=None, bins='tensorflow'):
        """Add histogram to summary.

        Args:
            tag (string): Data identifier
            values (torch.Tensor, numpy.array, or string/blobname): Values to build histogram
            global_step (int): Global step value to record
            bins (string): one of {'tensorflow', 'auto', 'fd', ...}, this determines how the bins are made. You can find
              other options in: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
        """
        values = make_np(values)
        self.vis.histogram(make_np(values), opts={'title': tag})

    @_check_connection
    def add_image(self, tag, img_tensor, global_step=None, caption=None):
        """Add image data to summary.

        Note that this requires the ``pillow`` package.

        Args:
            tag (string): Data identifier
            img_tensor (torch.Tensor, numpy.array, or string/blobname): Image data
            global_step (int): Global step value to record
        Shape:
            img_tensor: :math:`(C, H, W)`. Use ``torchvision.utils.make_grid()`` to prepare it is a good idea.
            C = colors (can be 1 - grayscale, 3 - RGB, 4 - RGBA)
        """
        img_tensor = make_np(img_tensor)
        self.vis.image(img_tensor, opts={'title': tag, 'caption': caption})

    @_check_connection
    def add_figure(self, tag, figure, global_step=None, close=True):
        """Render matplotlib figure into an image and add it to summary.

        Note that this requires the ``matplotlib`` package.

        Args:
            tag (string): Data identifier
            figure (matplotlib.pyplot.figure) or list of figures: figure or a list of figures
            global_step (int): Global step value to record
            close (bool): Flag to automatically close the figure
        """
        self.add_image(tag, figure_to_image(figure, close), global_step)

    @_check_connection
    def add_video(self, tag, vid_tensor, global_step=None, fps=4):
        """Add video data to summary.

        Note that this requires the ``moviepy`` package.

        Args:
            tag (string): Data identifier
            vid_tensor (torch.Tensor): Video data
            global_step (int): Global step value to record
            fps (float or int): Frames per second
        Shape:
            vid_tensor: :math:`(B, C, T, H, W)`. (if following tensorboardX format)
            vid_tensor: :math:`(T, H, W, C)`. (if following visdom format)
            B = batches, C = colors (1, 3, or 4), T = time frames, H = height, W = width
        """
        shape = vid_tensor.shape
        # A batch of videos (tensorboardX format) is a 5D tensor
        if len(shape) > 4:
            for i in range(shape[0]):
                # Reshape each video to Visdom's (T x H x W x C) and write each video
                # TODO: reverse the logic here, shoudl do the permutation in numpy
                if isinstance(vid_tensor, np.ndarray):
                    import torch
                    ind_vid = torch.from_numpy(
                        vid_tensor[i, :, :, :, :]).permute(1, 2, 3, 0)
                else:
                    ind_vid = vid_tensor[i, :, :, :, :].permute(1, 2, 3, 0)
                scale_factor = 255 if np.any(
                    (ind_vid > 0) & (ind_vid < 1)) else 1
                # Visdom looks for .ndim attr, this is something raw Tensors don't have
                # Cast to Numpy array to get .ndim attr
                ind_vid = ind_vid.numpy()
                ind_vid = (ind_vid * scale_factor).astype(np.uint8)
                assert ind_vid.shape[3] in [1, 3, 4], \
                    'Visdom requires the last dimension to be color, which can be 1 (grayscale), 3 (RGB) or 4 (RGBA)'
                self.vis.video(tensor=ind_vid, opts={'fps': fps})
        else:
            self.vis.video(tensor=vid_tensor, opts={'fps': fps})

    @_check_connection
    def add_audio(self, tag, snd_tensor, global_step=None, sample_rate=44100):
        """Add audio data to summary.

        Args:
            tag (string): Data identifier
            snd_tensor (torch.Tensor, numpy.array, or string/blobname): Sound data
            global_step (int): Global step value to record
            sample_rate (int): sample rate in Hz

        Shape:
            snd_tensor: :math:`(1, L)`. The values should lie between [-1, 1].
        """
        snd_tensor = make_np(snd_tensor)
        self.vis.audio(tensor=snd_tensor, opts={
                       'sample_frequency': sample_rate})

    @_check_connection
    def add_text(self, tag, text_string, global_step=None):
        """Add text data to summary.

        Args:
            tag (string): Data identifier
            text_string (string): String to save
            global_step (int): Global step value to record
        Examples::
            writer.add_text('lstm', 'This is an lstm', 0)
            writer.add_text('rnn', 'This is an rnn', 10)
        """
        if text_string is None:
            # Visdom doesn't support tags, write the tag as the text_string
            text_string = tag
        self.vis.text(text_string)

    @_check_connection
    def add_onnx_graph(self, prototxt):
        # TODO: Visdom doesn't support graph visualization yet, so this is a no-op
        return

    @_check_connection
    def add_graph(self, model, input_to_model=None, verbose=False, **kwargs):
        # TODO: Visdom doesn't support graph visualization yet, so this is a no-op
        return

    @_check_connection
    def add_embedding(self, mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None):
        # TODO: Visdom doesn't support embeddings yet, so this is a no-op
        return

    @_check_connection
    def add_pr_curve(self, tag, labels, predictions, global_step=None, num_thresholds=127, weights=None):
        """Adds precision recall curve.

        Args:
            tag (string): Data identifier
            labels (torch.Tensor, numpy.array, or string/blobname): Ground truth data. Binary label for each element.
            predictions (torch.Tensor, numpy.array, or string/blobname):
            The probability that an element be classified as true. Value should in [0, 1]
            global_step (int): Global step value to record
            num_thresholds (int): Number of thresholds used to draw the curve.

        """
        labels, predictions = make_np(labels), make_np(predictions)
        raw_data = compute_curve(labels, predictions, num_thresholds, weights)

        # compute_curve returns np.stack((tp, fp, tn, fn, precision, recall))
        # We want to access 'precision' and 'recall'
        precision, recall = raw_data[4, :], raw_data[5, :]

        self.vis.line(
            X=recall,
            Y=precision,
            name=tag,
            opts={
                'title': 'PR Curve for {}'.format(tag),
                'xlabel': 'recall',
                'ylabel': 'precision',
            },
        )

    @_check_connection
    def add_pr_curve_raw(self, tag, true_positive_counts,
                         false_positive_counts,
                         true_negative_counts,
                         false_negative_counts,
                         precision,
                         recall, global_step=None, num_thresholds=127, weights=None):
        """Adds precision recall curve with raw data.

        Args:
            tag (string): Data identifier
            true_positive_counts (torch.Tensor, numpy.array, or string/blobname): true positive counts
            false_positive_counts (torch.Tensor, numpy.array, or string/blobname): false positive counts
            true_negative_counts (torch.Tensor, numpy.array, or string/blobname): true negative counts
            false_negative_counts (torch.Tensor, numpy.array, or string/blobname): false negative counts
            precision (torch.Tensor, numpy.array, or string/blobname): precision
            recall (torch.Tensor, numpy.array, or string/blobname): recall
            global_step (int): Global step value to record
            num_thresholds (int): Number of thresholds used to draw the curve.
            see: https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/pr_curve/README.md
        """
        precision, recall = make_np(precision), make_np(recall)
        self.vis.line(
            X=recall,
            Y=precision,
            name=tag,
            opts={
                'title': 'PR Curve for {}'.format(tag),
                'xlabel': 'recall',
                'ylabel': 'precision',
            },
        )

    def close(self):
        del self.vis
        del self.scalar_dict
        gc.collect()


================================================
FILE: tensorboardX/tensorboardX/writer.py
================================================
"""Provides an API for writing protocol buffers to event files to be
consumed by TensorBoard for visualization."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
import os
import six
import time
import logging

from .embedding import make_mat, make_sprite, make_tsv, append_pbtxt
from .event_file_writer import EventFileWriter
from .onnx_graph import load_onnx_graph
from .pytorch_graph import graph
from .proto import event_pb2
from .proto import summary_pb2
from .proto.event_pb2 import SessionLog, Event
from .utils import figure_to_image
from .summary import (
    scalar, histogram, histogram_raw, image, audio, text,
    pr_curve, pr_curve_raw, video, custom_scalars, image_boxes, mesh, hparams
)


class DummyFileWriter(object):
    """A fake file writer that writes nothing to the disk.
    """
    def __init__(self, logdir):
        self._logdir = logdir

    def get_logdir(self):
        """Returns the directory where event file will be written."""
        return self._logdir

    def add_event(self, event, step=None, walltime=None):
        return

    def add_summary(self, summary, global_step=None, walltime=None):
        return

    def add_graph(self, graph_profile, walltime=None):
        return

    def add_onnx_graph(self, graph, walltime=None):
        return

    def flush(self):
        return

    def close(self):
        return

    def reopen(self):
        return


class FileWriter(object):
    """Writes protocol buffers to event files to be consumed by TensorBoard.

    The `FileWriter` class provides a mechanism to create an event file in a
    given directory and add summaries and events to it. The class updates the
    file contents asynchronously. This allows a training program to call methods
    to add data to the file directly from the training loop, without slowing down
    training.
    """

    def __init__(self, logdir, max_queue=10, flush_secs=120, filename_suffix=''):
        """Creates a `FileWriter` and an event file.
        On construction the writer creates a new event file in `logdir`.
        The other arguments to the constructor control the asynchronous writes to
        the event file.

        Args:
          logdir: A string. Directory where event file will be written.
          max_queue: Integer. Size of the queue for pending events and
            summaries before one of the 'add' calls forces a flush to disk.
            Default is ten items.
          flush_secs: Number. How often, in seconds, to flush the
            pending events and summaries to disk. Default is every two minutes.
          filename_suffix: A string. Suffix added to all event filenames
            in the logdir directory. More details on filename construction in
            tensorboard.summary.writer.event_file_writer.EventFileWriter.
        """
        # Sometimes PosixPath is passed in and we need to coerce it to
        # a string in all cases
        # TODO: See if we can remove this in the future if we are
        # actually the ones passing in a PosixPath
        logdir = str(logdir)
        self.event_writer = EventFileWriter(
            logdir, max_queue, flush_secs, filename_suffix)

    def get_logdir(self):
        """Returns the directory where event file will be written."""
        return self.event_writer.get_logdir()

    def add_event(self, event, step=None, walltime=None):
        """Adds an event to the event file.
        Args:
          event: An `Event` protocol buffer.
          step: Number. Optional global step value for training process
            to record with the event.
          walltime: float. Optional walltime to override the default (current)
            walltime (from time.time())
        """
        event.wall_time = time.time() if walltime is None else walltime
        if step is not None:
            # Make sure step is converted from numpy or other formats
            # since protobuf might not convert depending on version
            event.step = int(step)
        self.event_writer.add_event(event)

    def add_summary(self, summary, global_step=None, walltime=None):
        """Adds a `Summary` protocol buffer to the event file.
        This method wraps the provided summary in an `Event` protocol buffer
        and adds it to the event file.

        Args:
          summary: A `Summary` protocol buffer.
          global_step: Number. Optional global step value for training process
            to record with the summary.
          walltime: float. Optional walltime to override the default (current)
            walltime (from time.time())
        """
        event = event_pb2.Event(summary=summary)
        self.add_event(event, global_step, walltime)

    def add_graph(self, graph_profile, walltime=None):
        """Adds a `Graph` and step stats protocol buffer to the event file.

        Args:
          graph_profile: A `Graph` and step stats protocol buffer.
          walltime: float. Optional walltime to override the default (current)
            walltime (from time.time()) seconds after epoch
        """
        graph = graph_profile[0]
        stepstats = graph_profile[1]
        event = event_pb2.Event(graph_def=graph.SerializeToString())
        self.add_event(event, None, walltime)

        trm = event_pb2.TaggedRunMetadata(
            tag='step1', run_metadata=stepstats.SerializeToString())
        event = event_pb2.Event(tagged_run_metadata=trm)
        self.add_event(event, None, walltime)

    def add_onnx_graph(self, graph, walltime=None):
        """Adds a `Graph` protocol buffer to the event file.

        Args:
          graph: A `Graph` protocol buffer.
          walltime: float. Optional walltime to override the default (current)
            _get_file_writerfrom time.time())
        """
        event = event_pb2.Event(graph_def=graph.SerializeToString())
        self.add_event(event, None, walltime)

    def flush(self):
        """Flushes the event file to disk.
        Call this method to make sure that all pending events have been written to
        disk.
        """
        self.event_writer.flush()

    def close(self):
        """Flushes the event file to disk and close the file.
        Call this method when you do not need the summary writer anymore.
        """
        self.event_writer.close()

    def reopen(self):
        """Reopens the EventFileWriter.
        Can be called after `close()` to add more events in the same directory.
        The events will go into a new events file.
        Does nothing if the EventFileWriter was not closed.
        """
        self.event_writer.reopen()


class SummaryWriter(object):
    """Writes entries directly to event files in the logdir to be
    consumed by TensorBoard.

    The `SummaryWriter` class provides a high-level API to create an event file
    in a given directory and add summaries and events to it. The class updates the
    file contents asynchronously. This allows a training program to call methods
    to add data to the file directly from the training loop, without slowing down
    training.
    """

    def __init__(self, logdir=None, comment='', purge_step=None, max_queue=10,
                 flush_secs=120, filename_suffix='', write_to_disk=True, log_dir=None, **kwargs):
        """Creates a `SummaryWriter` that will write out events and summaries
        to the event file.

        Args:
            logdir (string): Save directory location. Default is
              runs/**CURRENT_DATETIME_HOSTNAME**, which changes after each run.
              Use hierarchical folder structure to compare
              between runs easily. e.g. pass in 'runs/exp1', 'runs/exp2', etc.
              for each new experiment to compare across them.
            comment (string): Comment logdir suffix appended to the default
              ``logdir``. If ``logdir`` is assigned, this argument has no effect.
            purge_step (int):
              When logging crashes at step :math:`T+X` and restarts at step :math:`T`,
              any events whose global_step larger or equal to :math:`T` will be
              purged and hidden from TensorBoard.
              Note that crashed and resumed experiments should have the same ``logdir``.
            max_queue (int): Size of the queue for pending events and
              summaries before one of the 'add' calls forces a flush to disk.
              Default is ten items.
            flush_secs (int): How often, in seconds, to flush the
              pending events and summaries to disk. Default is every two minutes.
            filename_suffix (string): Suffix added to all event filenames in
              the logdir directory. More details on filename construction in
              tensorboard.summary.writer.event_file_writer.EventFileWriter.
            write_to_disk (boolean):
              If pass `False`, SummaryWriter will not write to disk.

        Examples::

            from tensorboardX import SummaryWriter

            # create a summary writer with automatically generated folder name.
            writer = SummaryWriter()
            # folder location: runs/May04_22-14-54_s-MacBook-Pro.local/

            # create a summary writer using the specified folder name.
            writer = SummaryWriter("my_experiment")
            # folder location: my_experiment

            # create a summary writer with comment appended.
            writer = SummaryWriter(comment="LR_0.1_BATCH_16")
            # folder location: runs/May04_22-14-54_s-MacBook-Pro.localLR_0.1_BATCH_16/

        """
        if log_dir is not None and logdir is None:
            logdir = log_dir
        if not logdir:
            import socket
            from datetime import datetime
            current_time = datetime.now().strftime('%b%d_%H-%M-%S')
            logdir = os.path.join(
                'runs', current_time + '_' + socket.gethostname() + comment)
        self.logdir = logdir
        self.purge_step = purge_step
        self._max_queue = max_queue
        self._flush_secs = flush_secs
        self._filename_suffix = filename_suffix
        self._write_to_disk = write_to_disk
        self.kwargs = kwargs

        # Initialize the file writers, but they can be cleared out on close
        # and recreated later as needed.
        self.file_writer = self.all_writers = None
        self._get_file_writer()

        # Create default bins for histograms, see generate_testdata.py in tensorflow/tensorboard
        v = 1E-12
        buckets = []
        neg_buckets = []
        while v < 1E20:
            buckets.append(v)
            neg_buckets.append(-v)
            v *= 1.1
        self.default_bins = neg_buckets[::-1] + [0] + buckets

        self.scalar_dict = {}

    def __append_to_scalar_dict(self, tag, scalar_value, global_step,
                                timestamp):
        """This adds an entry to the self.scalar_dict datastructure with format
        {writer_id : [[timestamp, step, value], ...], ...}.
        """
        from .x2num import make_np
        if tag not in self.scalar_dict.keys():
            self.scalar_dict[tag] = []
        self.scalar_dict[tag].append(
            [timestamp, global_step, float(make_np(scalar_value))])

    def _check_caffe2_blob(self, item):
        """
        Caffe2 users have the option of passing a string representing the name of
        a blob in the workspace instead of passing the actual Tensor/array containing
        the numeric values. Thus, we need to check if we received a string as input
        instead of an actual Tensor/array, and if so, we need to fetch the Blob
        from the workspace corresponding to that name. Fetching can be done with the
        following:

        from caffe2.python import workspace (if not already imported)
        workspace.FetchBlob(blob_name)
        workspace.FetchBlobs([blob_name1, blob_name2, ...])
        """
        return isinstance(item, six.string_types)

    def _get_file_writer(self):
        """Returns the default FileWriter instance. Recreates it if closed."""
        if not self._write_to_disk:
            self.file_writer = DummyFileWriter(logdir=self.logdir)
            self.all_writers = {self.file_writer.get_logdir(): self.file_writer}
            return self.file_writer

        if self.all_writers is None or self.file_writer is None:
            if 'purge_step' in self.kwargs.keys():
                most_recent_step = self.kwargs.pop('purge_step')
                self.file_writer = FileWriter(logdir=self.logdir,
                                              max_queue=self._max_queue,
                                              flush_secs=self._flush_secs,
                                              filename_suffix=self._filename_suffix,
                                              **self.kwargs)
                self.file_writer.add_event(
                    Event(step=most_recent_step, file_version='brain.Event:2'))
                self.file_writer.add_event(
                    Event(step=most_recent_step, session_log=SessionLog(status=SessionLog.START)))
            else:
                self.file_writer = FileWriter(logdir=self.logdir,
                                              max_queue=self._max_queue,
                                              flush_secs=self._flush_secs,
                                              filename_suffix=self._filename_suffix,
                                              **self.kwargs)
            self.all_writers = {self.file_writer.get_logdir(): self.file_writer}
        return self.file_writer

    def add_hparams(self, hparam_dict=None, metric_dict=None):
        """Add a set of hyperparameters to be compared in tensorboard.

        Args:
            hparam_dict (dictionary): Each key-value pair in the dictionary is the
              name of the hyper parameter and it's corresponding value.
            metric_dict (dictionary): Each key-value pair in the dictionary is the
              name of the metric and it's corresponding value. Note that the key used
              here should be unique in the tensorboard record. Otherwise the value
              you added by `add_scalar` will be displayed in hparam plugin. In most
              cases, this is unwanted.

        Examples::

            from tensorboardX import SummaryWriter
            with SummaryWriter() as w:
                for i in range(5):
                    w.add_hparams({'lr': 0.1*i, 'bsize': i},
                                  {'hparam/accuracy': 10*i, 'hparam/loss': 10*i})

        Expected result:

        .. image:: _static/img/tensorboard/add_hparam.png
           :scale: 50 %
        """
        if type(hparam_dict) is not dict or type(metric_dict) is not dict:
            raise TypeError('hparam_dict and metric_dict should be dictionary.')
        exp, ssi, sei = hparams(hparam_dict, metric_dict)

        with SummaryWriter(logdir=os.path.join(self.file_writer.get_logdir(), str(time.time()))) as w_hp:
            w_hp.file_writer.add_summary(exp)
            w_hp.file_writer.add_summary(ssi)
            w_hp.file_writer.add_summary(sei)
            for k, v in metric_dict.items():
                w_hp.add_scalar(k, v)

    def add_scalar(self, tag, scalar_value, global_step=None, walltime=None):
        """Add scalar data to summary.

        Args:
            tag (string): Data identifier
            scalar_value (float or string/blobname): Value to save
            global_step (int): Global step value to record
            walltime (float): Optional override default walltime (time.time()) of event

        Examples::

            from tensorboardX import SummaryWriter
            writer = SummaryWriter()
            x = range(100)
            for i in x:
                writer.add_scalar('y=2x', i * 2, i)
            writer.close()

        Expected result:

        .. image:: _static/img/tensorboard/add_scalar.png
           :scale: 50 %

        """
        if self._check_caffe2_blob(scalar_value):
            scalar_value = workspace.FetchBlob(scalar_value)
        self._get_file_writer().add_summary(
            scalar(tag, scalar_value), global_step, walltime)

    def add_scalars(self, main_tag, tag_scalar_dict, global_step=None, walltime=None):
        """Adds many scalar data to summary.

        Note that this function also keeps logged scalars in memory. In extreme case it explodes your RAM.

        Args:
            main_tag (string): The parent name for the tags
            tag_scalar_dict (dict): Key-value pair storing the tag and corresponding values
            global_step (int): Global step value to record
            walltime (float): Optional override default walltime (time.time()) of event

        Examples::

            from tensorboardX import SummaryWriter
            writer = SummaryWriter()
            r = 5
            for i in range(100):
                writer.add_scalars('run_14h', {'xsinx':i*np.sin(i/r),
                                                'xcosx':i*np.cos(i/r),
                                                'tanx': np.tan(i/r)}, i)
            writer.close()
            # This call adds three values to the same scalar plot with the tag
            # 'run_14h' in TensorBoard's scalar section.

        Expected result:

        .. image:: _static/img/tensorboard/add_scalars.png
           :scale: 50 %

        """
        walltime = time.time() if walltime is None else walltime
        fw_logdir = self._get_file_writer().get_logdir()
        for tag, scalar_value in tag_scalar_dict.items():
            fw_tag = fw_logdir + "/" + main_tag + "/" + tag
            if fw_tag in self.all_writers.keys():
                fw = self.all_writers[fw_tag]
            else:
                fw = FileWriter(logdir=fw_tag)
                self.all_writers[fw_tag] = fw
            if self._check_caffe2_blob(scalar_value):
                scalar_value = workspace.FetchBlob(scalar_value)
            fw.add_summary(scalar(main_tag, scalar_value),
                           global_step, walltime)
            self.__append_to_scalar_dict(
                fw_tag, scalar_value, global_step, walltime)

    def export_scalars_to_json(self, path):
        """Exports to the given path an ASCII file containing all the scalars written
        so far by this instance, with the following format:
        {writer_id : [[timestamp, step, value], ...], ...}

        The scalars saved by ``add_scalars()`` will be flushed after export.
        """
        with open(path, "w") as f:
            json.dump(self.scalar_dict, f)
        self.scalar_dict = {}

    def add_histogram(self, tag, values, global_step=None, bins='tensorflow', walltime=None, max_bins=None):
        """Add histogram to summary.

        Args:
            tag (string): Data identifier
            values (torch.Tensor, numpy.array, or string/blobname): Values to build histogram
            global_step (int): Global step value to record
            bins (string): One of {'tensorflow','auto', 'fd', ...}. This determines how the bins are made. You can find
              other options in: https://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
            walltime (float): Optional override default walltime (time.time()) of event

        Examples::

            from tensorboardX import SummaryWriter
            import numpy as np
            writer = SummaryWriter()
            for i in range(10):
                x = np.random.random(1000)
                writer.add_histogram('distribution centers', x + i, i)
            writer.close()

        Expected result:

        .. image:: _static/img/tensorboard/add_histogram.png
           :scale: 50 %

        """
        if self._check_caffe2_blob(values):
            values = workspace.FetchBlob(values)
        if isinstance(bins, six.string_types) and bins == 'tensorflow':
            bins = self.default_bins
        self._get_file_writer().add_summary(
            histogram(tag, values, bins, max_bins=max_bins), global_step, walltime)

    def add_histogram_raw(self, tag, min, max, num, sum, sum_squares,
                          bucket_limits, bucket_counts, global_step=None,
                          walltime=None):
        """Adds histogram with raw data.

        Args:
            tag (string): Data identifier
            min (float or int): Min value
            max (float or int): Max value
            num (int): Number of values
            sum (float or int): Sum of all values
            sum_squares (float or int): Sum of squares for all values
            bucket_limits (torch.Tensor, numpy.array): Upper value per
              bucket, note that the bucket_limits returned from `np.histogram`
              has one more element. See the comment in the following example.
            bucket_counts (torch.Tensor, numpy.array): Number of values per bucket
            global_step (int): Global step value to record
            walltime (float): Optional override default walltime (time.time()) of event

        Examples::

            import numpy as np
            dummy_data = []
            for idx, value in enumerate(range(30)):
                dummy_data += [idx + 0.001] * value
            values = np.array(dummy_data).astype(float).reshape(-1)
            counts, limits = np.histogram(values)
            sum_sq = values.dot(values)
            with SummaryWriter() as summary_writer:
                summary_writer.add_histogram_raw(
                        tag='hist_dummy_data',
                        min=values.min(),
                        max=values.max(),
                        num=len(values),
                        sum=values.sum(),
                        sum_squares=sum_sq,
                        bucket_limits=limits[1:].tolist(),  # <- note here.
                        bucket_counts=counts.tolist(),
                        global_step=0)

        """
        if len(bucket_limits) != len(bucket_counts):
            raise ValueError('len(bucket_limits) != len(bucket_counts), see the document.')
        self._get_file_writer().add_summary(
            histogram_raw(tag,
                          min,
                          max,
                          num,
                          sum,
                          sum_squares,
                          bucket_limits,
                          bucket_counts),
            global_step,
            walltime)

    def add_image(self, tag, img_tensor, global_step=None, walltime=None, dataformats='CHW'):
        """Add image data to summary.

        Note that this requires the ``pillow`` package.

        Args:
            tag (string): Data identifier
            img_tensor (torch.Tensor, numpy.array, or string/blobname): An `uint8` or `float`
                Tensor of shape `[channel, height, width]` where `channel` is 1, 3, or 4.
                The elements in img_tensor can either have values in [0, 1] (float32) or [0, 255] (uint8).
                Users are responsible to scale the data in the correct range/type.
            global_step (int): Global step value to record
            walltime (float): Optional override default walltime (time.time()) of event.
            dataformats (string): This parameter specifies the meaning of each dimension of the input tensor.
        Shape:
            img_tensor: Default is :math:`(3, H, W)`. You can use ``torchvision.utils.make_grid()`` to
            convert a batch of tensor into 3xHxW format or use ``add_images()`` and let us do the job.
            Tensor with :math:`(1, H, W)`, :math:`(H, W)`, :math:`(H, W, 3)` is also suitible as long as
            corresponding ``dataformats`` argument is passed. e.g. CHW, HWC, HW.

        Examples::

            from tensorboardX import SummaryWriter
            import numpy as np
            img = np.zeros((3, 100, 100))
            img[0] = np.arange(0, 10000).reshape(100, 100) / 10000
            img[1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000

            img_HWC = np.zeros((100, 100, 3))
            img_HWC[:, :, 0] = np.arange(0, 10000).reshape(100, 100) / 10000
            img_HWC[:, :, 1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000

            writer = SummaryWriter()
            writer.add_image('my_image', img, 0)

            # If you have non-default dimension setting, set the dataformats argument.
            writer.add_image('my_image_HWC', img_HWC, 0, dataformats='HWC')
            writer.close()

        Expected result:

        .. image:: _static/img/tensorboard/add_image.png
           :scale: 50 %

        """
        if self._check_caffe2_blob(img_tensor):
            img_tensor = workspace.FetchBlob(img_tensor)
        self._get_file_writer().add_summary(
            image(tag, img_tensor, dataformats=dataformats), global_step, walltime)

    def add_images(self, tag, img_tensor, global_step=None, walltime=None, dataformats='NCHW'):
        """Add batched (4D) image data to summary.
        Besides passing 4D (NCHW) tensor, you can also pass a list of tensors of the same size.
        In this case, the ``dataformats`` should be `CHW` or `HWC`.
        Note that this requires the ``pillow`` package.

        Args:
            tag (string): Data identifier
            img_tensor (torch.Tensor, numpy.array, or string/blobname): Image data
                The elements in img_tensor can either have values in [0, 1] (float32) or [0, 255] (uint8).
                Users are responsible to scale the data in the correct range/type.
            global_step (int): Global step value to record
            walltime (float): Optional override default walltime (time.time()) of event
        Shape:
            img_tensor: Default is :math:`(N, 3, H, W)`. If ``dataformats`` is specified, other shape will be
            accepted. e.g. NCHW or NHWC.

        Examples::

            from tensorboardX import SummaryWriter
            import numpy as np

            img_batch = np.zeros((16, 3, 100, 100))
            for i in range(16):
                img_batch[i, 0] = np.arange(0, 10000).reshape(100, 100) / 10000 / 16 * i
                img_batch[i, 1] = (1 - np.arange(0, 10000).reshape(100, 100) / 10000) / 16 * i

            writer = SummaryWriter()
            writer.add_images('my_image_batch', img_batch, 0)
            writer.close()

        Expected result:

        .. image:: _static/img/tensorboard/add_images.png
           :scale: 30 %

        """
        if self._check_caffe2_blob(img_tensor):
            img_tensor = workspace.FetchBlob(img_tensor)
        if isinstance(img_tensor, list):  # a list of tensors in CHW or HWC
            if dataformats.upper() != 'CHW' and dataformats.upper() != 'HWC':
                print('A list of image is passed, but the dataformat is neither CHW nor HWC.')
                print('Nothing is written.')
                return
            import torch
            try:
                img_tensor = torch.stack(img_tensor, 0)
            except TypeError as e:
                import numpy as np
                img_tensor = np.stack(img_tensor, 0)

            dataformats = 'N' + dataformats

        self._get_file_writer().add_summary(
            image(tag, img_tensor, dataformats=dataformats), global_step, walltime)

    def add_image_with_boxes(self, tag, img_tensor, box_tensor, global_step=None,
                             walltime=None, dataformats='CHW', labels=None, **kwargs):
        """Add image and draw bounding boxes on the image.

        Args:
            tag (string): Data identifier
            img_tensor (torch.Tensor, numpy.array, or string/blobname): Image data
            box_tensor (torch.Tensor, numpy.array, or string/blobname): Box data (for detected objects)
              box should be represented as [x1, y1, x2, y2].
            global_step (int): Global step value to record
            walltime (float): Optional override default walltime (time.time()) of event
            labels (list of string): The strings to be show on each bounding box.
        Shape:
            img_tensor: Default is :math:`(3, H, W)`. It can be specified with ``dataformat`` agrument.
            e.g. CHW or HWC

            box_tensor: (torch.Tensor, numpy.array, or string/blobname): NX4,  where N is the number of
            boxes and each 4 elememts in a row represents (xmin, ymin, xmax, ymax).
        """
        if self._check_caffe2_blob(img_tensor):
            img_tensor = workspace.FetchBlob(img_tensor)
        if self._check_caffe2_blob(box_tensor):
            box_tensor = workspace.FetchBlob(box_tensor)
        if labels is not None:
            if isinstance(labels, str):
                labels = [labels]
            if len(labels) != box_tensor.shape[0]:
                logging.warning('Number of labels do not equal to number of box, skip the labels.')
                labels = None
        self._get_file_writer().add_summary(image_boxes(
            tag, img_tensor, box_tensor, dataformats=dataformats, labels=labels, **kwargs), global_step, walltime)

    def add_figure(self, tag, figure, global_step=None, close=True, walltime=None):
        """Render matplotlib figure into an image and add it to summary.

        Note that this requires the ``matplotlib`` package.

        Args:
            tag (string): Data identifier
            figure (matplotlib.pyplot.figure) or list of figures: Figure or a list of figures
            global_step (int): Global step value to record
            close (bool): Flag to automatically close the figure
            walltime (float): Optional override default walltime (time.time()) of event
        """
        if isinstance(figure, list):
            self.add_image(tag, figure_to_image(figure, close), global_step, walltime, dataformats='NCHW')
        else:
            self.add_image(tag, figure_to_image(figure, close), global_step, walltime, dataformats='CHW')

    def add_video(self, tag, vid_tensor, global_step=None, fps=4, walltime=None):
        """Add video data to summary.

        Note that this requires the ``moviepy`` package.

        Args:
            tag (string): Data identifier
            vid_tensor (torch.Tensor): Video data
            global_step (int): Global step value to record
            fps (float or int): Frames per second
            walltime (float): Optional override default walltime (time.time()) of event
        Shape:
            vid_tensor: :math:`(N, T, C, H, W)`. The values should lie in [0, 255] for type
              `uint8` or [0, 1] for type `float`.
        """
        self._get_file_writer().add_summary(
            video(tag, vid_tensor, fps), global_step, walltime)

    def add_audio(self, tag, snd_tensor, global_step=None, sample_rate=44100, walltime=None):
        """Add audio data to summary.

        Args:
            tag (string): Data identifier
            snd_tensor (torch.Tensor): Sound data
            global_step (int): Global step value to record
            sample_rate (int): sample rate in Hz
            walltime (float): Optional override default walltime (time.time()) of event
        Shape:
            snd_tensor: :math:`(L, c)`. The values should lie between [-1, 1].
        """
        if self._check_caffe2_blob(snd_tensor):
            snd_tensor = workspace.FetchBlob(snd_tensor)
        self._get_file_writer().add_summary(
            audio(tag, snd_tensor, sample_rate=sample_rate), global_step, walltime)

    def add_text(self, tag, text_string, global_step=None, walltime=None):
        """Add text data to summary.

        Args:
            tag (string): Data identifier
            text_string (string): String to save
            global_step (int): Global step value to record
            walltime (float): Optional override default walltime (time.time()) of event
        Examples::

            writer.add_text('lstm', 'This is an lstm', 0)
            writer.add_text('rnn', 'This is an rnn', 10)
        """
        self._get_file_writer().add_summary(
            text(tag, text_string), global_step, walltime)

    def add_onnx_graph(self, prototxt):
        self._get_file_writer().add_onnx_graph(load_onnx_graph(prototxt))

    def add_graph(self, model, input_to_model=None, verbose=False, **kwargs):
        # prohibit second call?
        # no, let tensorboard handle it and show its warning message.
        """Add graph data to summary.

        Args:
            model (torch.nn.Module): Model to draw.
            input_to_model (torch.Tensor or list of torch.Tensor): A variable or a tuple of
                variables to be fed.
            verbose (bool): Whether to print graph structure in console.
            omit_useless_nodes (bool): Default to ``true``, which eliminates unused nodes.
            operator_export_type (string): One of: ``"ONNX"``, ``"RAW"``. This determines
                the optimization level of the graph. If error happens during exporting
                the graph, using ``"RAW"`` might help.

        """
        if hasattr(model, 'forward'):
            # A valid PyTorch model should have a 'forward' method
            import torch
            from distutils.version import LooseVersion
            if LooseVersion(torch.__version__) >= LooseVersion("0.3.1"):
                pass
            else:
                if LooseVersion(torch.__version__) >= LooseVersion("0.3.0"):
                    print('You are using PyTorch==0.3.0, use add_onnx_graph()')
                    return
                if not hasattr(torch.autograd.Variable, 'grad_fn'):
                    print('add_graph() only supports PyTorch v0.2.')
                    return
            self._get_file_writer().add_graph(graph(model, input_to_model, verbose, **kwargs))
        else:
            # Caffe2 models do not have the 'forward' method
            from caffe2.proto import caffe2_pb2
            from caffe2.python import core
            from .caffe2_graph import (
                model_to_graph_def, nets_to_graph_def, protos_to_graph_def
            )
            if isinstance(model, list):
                if isinstance(model[0], core.Net):
                    current_graph = nets_to_graph_def(
                        model, **kwargs)
                elif isinstance(model[0], caffe2_pb2.NetDef):
                    current_graph = protos_to_graph_def(
                        model, **kwargs)
            else:
                # Handles cnn.CNNModelHelper, model_helper.ModelHelper
                current_graph = model_to_graph_def(
                    model, **kwargs)
            event = event_pb2.Event(
                graph_def=current_graph.SerializeToString())
            self._get_file_writer().add_event(event)

    @staticmethod
    def _encode(rawstr):
        # I'd use urllib but, I'm unsure about the differences from python3 to python2, etc.
        retval = rawstr
        retval = retval.replace("%", "%%%02x" % (ord("%")))
        retval = retval.replace("/", "%%%02x" % (ord("/")))
        retval = retval.replace("\\", "%%%02x" % (ord("\\")))
        return retval

    def add_embedding(self, mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None):
        """Add embedding projector data to summary.

        Args:
            mat (torch.Tensor or numpy.array): A matrix which each row is the feature vector of the data point
            metadata (list): A list of labels, each element will be convert to string
            label_img (torch.Tensor or numpy.array): Images correspond to each data point. Each image should be square.
            global_step (int): Global step value to record
            tag (string): Name for the embedding
        Shape:
            mat: :math:`(N, D)`, where N is number of data and D is feature dimension

            label_img: :math:`(N, C, H, W)`, where `Height` should be equal to `Width`.

        Examples::

            import keyword
            import torch
            meta = []
            while len(meta)<100:
                meta = meta+keyword.kwlist # get some strings
            meta = meta[:100]

            for i, v in enumerate(meta):
                meta[i] = v+str(i)

            label_img = torch.rand(100, 3, 32, 32)
            for i in range(100):
                label_img[i]*=i/100.0

            writer.add_embedding(torch.randn(100, 5), metadata=meta, label_img=label_img)
            writer.add_embedding(torch.randn(100, 5), label_img=label_img)
            writer.add_embedding(torch.randn(100, 5), metadata=meta)
        """
        from .x2num import make_np
        mat = make_np(mat)
        if global_step is None:
            global_step = 0
            # clear pbtxt?
        # Maybe we should encode the tag so slashes don't trip us up?
        # I don't think this will mess us up, but better safe than sorry.
        subdir = "%s/%s" % (str(global_step).zfill(5), self._encode(tag))
        save_path = os.path.join(self._get_file_writer().get_logdir(), subdir)
        try:
            os.makedirs(save_path)
        except OSError:
            print(
                'warning: Embedding dir exists, did you set global_step for add_embedding()?')
        if metadata is not None:
            assert mat.shape[0] == len(
                metadata), '#labels should equal with #data points'
            make_tsv(metadata, save_path, metadata_header=metadata_header)
        if label_img is not None:
            assert mat.shape[0] == label_img.shape[0], '#images should equal with #data points'
            assert label_img.shape[2] == label_img.shape[3], 'Image should be square, see tensorflow/tensorboard#670'
            make_sprite(label_img, save_path)
        assert mat.ndim == 2, 'mat should be 2D, where mat.size(0) is the number of data points'
        make_mat(mat, save_path)
        # new funcion to append to the config file a new embedding
        append_pbtxt(metadata, label_img,
                     self._get_file_writer().get_logdir(), subdir, global_step, tag)

    def add_pr_curve(self, tag, labels, predictions, global_step=None,
                     num_thresholds=127, weights=None, walltime=None):
        """Adds precision recall curve.
        Plotting a precision-recall curve lets you understand your model's
        performance under different threshold settings. With this function,
        you provide the ground truth labeling (T/F) and prediction confidence
        (usually the output of your model) for each target. The TensorBoard UI
        will let you choose the threshold interactively.

        Args:
            tag (string): Data identifier
            labels (torch.Tensor, numpy.array, or string/blobname):
              Ground truth data. Binary label for each element.
            predictions (torch.Tensor, numpy.array, or string/blobname):
              The probability that an element be classified as true.
              Value should in [0, 1]
            global_step (int): Global step value to record
            num_thresholds (int): Number of thresholds used to draw the curve.
            walltime (float): Optional override default walltime (time.time()) of event

        Examples::

            from tensorboardX import SummaryWriter
            import numpy as np
            labels = np.random.randint(2, size=100)  # binary label
            predictions = np.random.rand(100)
            writer = SummaryWriter()
            writer.add_pr_curve('pr_curve', labels, predictions, 0)
            writer.close()

        """
        from .x2num import make_np
        labels, predictions = make_np(labels), make_np(predictions)
        self._get_file_writer().add_summary(
            pr_curve(tag, labels, predictions, num_thresholds, weights),
            global_step, walltime)

    def add_pr_curve_raw(self, tag, true_positive_counts,
                         false_positive_counts,
                         true_negative_counts,
                         false_negative_counts,
                         precision,
                         recall,
                         global_step=None,
                         num_thresholds=127,
                         weights=None,
                         walltime=None):
        """Adds precision recall curve with raw data.

        Args:
            tag (string): Data identifier
            true_positive_counts (torch.Tensor, numpy.array, or string/blobname): true positive counts
            false_positive_counts (torch.Tensor, numpy.array, or string/blobname): false positive counts
            true_negative_counts (torch.Tensor, numpy.array, or string/blobname): true negative counts
            false_negative_counts (torch.Tensor, numpy.array, or string/blobname): false negative counts
            precision (torch.Tensor, numpy.array, or string/blobname): precision
            recall (torch.Tensor, numpy.array, or string/blobname): recall
            global_step (int): Global step value to record
            num_thresholds (int): Number of thresholds used to draw the curve.
            walltime (float): Optional override default walltime (time.time()) of event
            see: https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/pr_curve/README.md
        """
        self._get_file_writer().add_summary(
            pr_curve_raw(tag,
                         true_positive_counts,
                         false_positive_counts,
                         true_negative_counts,
                         false_negative_counts,
                         precision,
                         recall,
                         num_thresholds,
                         weights),
            global_step,
            walltime)

    def add_custom_scalars_multilinechart(self, tags, category='default', title='untitled'):
        """Shorthand for creating multilinechart. Similar to ``add_custom_scalars()``, but the only necessary argument
        is *tags*.

        Args:
            tags (list): list of tags that have been used in ``add_scalar()``

        Examples::

            writer.add_custom_scalars_multilinechart(['twse/0050', 'twse/2330'])
        """
        layout = {category: {title: ['Multiline', tags]}}
        self._get_file_writer().add_summary(custom_scalars(layout))

    def add_custom_scalars_marginchart(self, tags, category='default', title='untitled'):
        """Shorthand for creating marginchart. Similar to ``add_custom_scalars()``, but the only necessary argument
        is *tags*, which should have exactly 3 elements.

        Args:
            tags (list): list of tags that have been used in ``add_scalar()``

        Examples::

            writer.add_custom_scalars_marginchart(['twse/0050', 'twse/2330', 'twse/2006'])
        """
        assert len(tags) == 3
        layout = {category: {title: ['Margin', tags]}}
        self._get_file_writer().add_summary(custom_scalars(layout))

    def add_custom_scalars(self, layout):
        """Create special chart by collecting charts tags in 'scalars'. Note that this function can only be called once
        for each SummaryWriter() object. Because it only provides metadata to tensorboard, the function can be called
        before or after the training loop. See ``examples/demo_custom_scalars.py`` for more.

        Args:
            layout (dict): {categoryName: *charts*}, where *charts* is also a dictionary
              {chartName: *ListOfProperties*}. The first element in *ListOfProperties* is the chart's type
              (one of **Multiline** or **Margin**) and the second element should be a list containing the tags
              you have used in add_scalar function, which will be collected into the new chart.

        Examples::

            layout = {'Taiwan':{'twse':['Multiline',['twse/0050', 'twse/2330']]},
                         'USA':{ 'dow':['Margin',   ['dow/aaa', 'dow/bbb', 'dow/ccc']],
                              'nasdaq':['Margin',   ['nasdaq/aaa', 'nasdaq/bbb', 'nasdaq/ccc']]}}

            writer.add_custom_scalars(layout)
        """
        self._get_file_writer().add_summary(custom_scalars(layout))

    def add_mesh(self, tag, vertices, colors=None, faces=None, config_dict=None, global_step=None, walltime=None):
        """Add meshes or 3D point clouds to TensorBoard. The visualization is based on Three.js,
        so it allows users to interact with the rendered object. Besides the basic definitions
        such as vertices, faces, users can further provide camera parameter, lighting condition, etc.
        Please check https://threejs.org/docs/index.html#manual/en/introduction/Creating-a-scene for
        advanced usage. Note that currently this depends on tb-nightly to show.

        Args:
            tag (string): Data identifier
            vertices (torch.Tensor): List of the 3D coordinates of vertices.
            colors (torch.Tensor): Colors for each vertex
            faces (torch.Tensor): Indices of vertices within each triangle. (Optional)
            config_dict: Dictionary with ThreeJS classes names and configuration.
            global_step (int): Global step value to record
            walltime (float): Optional override default walltime (time.time())
              seconds after epoch of event

        Shape:
            vertices: :math:`(B, N, 3)`. (batch, number_of_vertices, channels). If you see nothing on
              tensorboard, try normalizing the values to [-1, 1].

            colors: :math:`(B, N, 3)`. The values should lie in [0, 255].

            faces: :math:`(B, N, 3)`. The values should lie in [0, number_of_vertices] for type `uint8`.

        Examples::

            from tensorboardX import SummaryWriter
            vertices_tensor = np.array([[
                [1, 1, 1],
                [-1, -1, 1],
                [1, -1, -1],
                [-1, 1, -1],
            ]], dtype=float)
            colors_tensor = np.array([[
                [255, 0, 0],
                [0, 255, 0],
                [0, 0, 255],
                [255, 0, 255],
            ]], dtype=int)
            faces_tensor = np.array([[
                [0, 2, 3],
                [0, 3, 1],
                [0, 1, 2],
                [1, 3, 2],
            ]], dtype=int)

            writer = SummaryWriter()
            writer.add_mesh('my_mesh', vertices=vertices_tensor, colors=colors_tensor, faces=faces_tensor)

            writer.close()
        """
        self._get_file_writer().add_summary(mesh(tag, vertices, colors, faces, config_dict), global_step, walltime)

    def close(self):
        if self.all_writers is None:
            return  # ignore double close
        for writer in self.all_writers.values():
            writer.flush()
            writer.close()
        self.file_writer = self.all_writers = None

    def flush(self):
        if self.all_writers is None:
            return  # ignore double close
        for writer in self.all_writers.values():
            writer.flush()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()


================================================
FILE: tensorboardX/tensorboardX/x2num.py
================================================
# DO NOT alter/distruct/free input object !
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging
import numpy as np
import six


def check_nan(array):
    tmp = np.sum(array)
    if np.isnan(tmp) or np.isinf(tmp):
        logging.warning('NaN or Inf found in input tensor.')
    return array


def make_np(x):
    if isinstance(x, list):
        return check_nan(np.array(x))
    if isinstance(x, np.ndarray):
        return check_nan(x)
    if isinstance(x, six.string_types):  # Caffe2 will pass name of blob(s) to fetch
        return check_nan(prepare_caffe2(x))
    if np.isscalar(x):
        return check_nan(np.array([x]))
    if 'torch' in str(type(x)):
        return check_nan(prepare_pytorch(x))
    if 'chainer' in str(type(x)):
        return check_nan(prepare_chainer(x))
    if 'mxnet' in str(type(x)):
        return check_nan(prepare_mxnet(x))
    raise NotImplementedError(
        'Got {}, but expected numpy array or torch tensor.'.format(type(x)))


def prepare_pytorch(x):
    import torch
    if isinstance(x, torch.autograd.Variable):
        x = x.data
    x = x.cpu().numpy()
    return x


def prepare_theano(x):
    import theano
    pass


def prepare_caffe2(x):
    from caffe2.python import workspace
    x = workspace.FetchBlob(x)
    return x


def prepare_mxnet(x):
    x = x.asnumpy()
    return x


def prepare_chainer(x):
    import chainer
    x = chainer.cuda.to_cpu(x.data)
    return x


================================================
FILE: tensorboardX/tensorboardX.patch
================================================
diff --git a/tensorboardX/summary.py b/tensorboardX/summary.py
index 27d99ea..f5bf234 100644
--- a/tensorboardX/summary.py
+++ b/tensorboardX/summary.py
@@ -373,36 +373,24 @@ def make_video(tensor, fps):
 
 def audio(tag, tensor, sample_rate=44100):
     tensor = make_np(tensor)
-    tensor = tensor.squeeze()
     if abs(tensor).max() > 1:
         print('warning: audio amplitude out of range, auto clipped.')
         tensor = tensor.clip(-1, 1)
-    assert(tensor.ndim == 1), 'input tensor should be 1 dimensional.'
-
-    tensor_list = [int(32767.0 * x) for x in tensor]
+    assert(tensor.ndim == 2), 'input tensor should be 2 dimensional.'
+    length_frames, num_channels = tensor.shape
+    assert num_channels == 1 or num_channels == 2, f'Expected 1/2 channels, got {num_channels}'
+    import soundfile
     import io
-    import wave
-    import struct
-    fio = io.BytesIO()
-    Wave_write = wave.open(fio, 'wb')
-    Wave_write.setnchannels(1)
-    Wave_write.setsampwidth(2)
-    Wave_write.setframerate(sample_rate)
-    tensor_enc = b''
-    tensor_enc += struct.pack("<" + "h" * len(tensor_list), *tensor_list)
-
-    Wave_write.writeframes(tensor_enc)
-    Wave_write.close()
-    audio_string = fio.getvalue()
-    fio.close()
+    with io.BytesIO() as fio:
+        soundfile.write(fio, tensor, samplerate=sample_rate, format='wav')
+        audio_string = fio.getvalue()
     audio = Summary.Audio(sample_rate=sample_rate,
-                          num_channels=1,
-                          length_frames=len(tensor_list),
+                          num_channels=num_channels,
+                          length_frames=length_frames,
                           encoded_audio_string=audio_string,
                           content_type='audio/wav')
     return Summary(value=[Summary.Value(tag=tag, audio=audio)])
 
-
 def custom_scalars(layout):
     categoriesnames = layout.keys()
     categories = []
diff --git a/tensorboardX/writer.py b/tensorboardX/writer.py
index 06337a7..58d57a1 100644
--- a/tensorboardX/writer.py
+++ b/tensorboardX/writer.py
@@ -716,7 +716,7 @@ class SummaryWriter(object):
             sample_rate (int): sample rate in Hz
             walltime (float): Optional override default walltime (time.time()) of event
         Shape:
-            snd_tensor: :math:`(1, L)`. The values should lie between [-1, 1].
+            snd_tensor: :math:`(L, c)`. The values should lie between [-1, 1].
         """
         if self._check_caffe2_blob(snd_tensor):
             snd_tensor = workspace.FetchBlob(snd_tensor)


================================================
FILE: tensorboardX/tests/__init__.py
================================================
import torch
import tensorboardX.proto


================================================
FILE: tensorboardX/tests/event_file_writer_test.py
================================================
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# """Tests for EventFileWriter and _AsyncWriter"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import glob
import os
from tensorboardX.event_file_writer import EventFileWriter
from tensorboardX.event_file_writer import EventFileWriter as _AsyncWriter


from tensorboardX.proto import event_pb2
from tensorboardX.proto.summary_pb2 import Summary

from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New
import unittest


class EventFileWriterTest(unittest.TestCase):
  def get_temp_dir(self):
    import tempfile
    return tempfile.mkdtemp()

  def test_event_file_writer_roundtrip(self):
    _TAGNAME = 'dummy'
    _DUMMY_VALUE = 42
    logdir = self.get_temp_dir()
    w = EventFileWriter(logdir)
    summary = Summary(value=[Summary.Value(tag=_TAGNAME, simple_value=_DUMMY_VALUE)])
    fakeevent = event_pb2.Event(summary=summary)
    w.add_event(fakeevent)
    w.close()
    event_files = sorted(glob.glob(os.path.join(logdir, '*')))
    self.assertEqual(len(event_files), 1)
    r = PyRecordReader_New(event_files[0])
    r.GetNext()  # meta data, so skip
    r.GetNext()
    self.assertEqual(fakeevent.SerializeToString(), r.record())

  def test_setting_filename_suffix_works(self):
    logdir = self.get_temp_dir()

    w = EventFileWriter(logdir, filename_suffix='.event_horizon')
    w.close()
    event_files = sorted(glob.glob(os.path.join(logdir, '*')))
    self.assertEqual(event_files[0].split('.')[-1], 'event_horizon')

  def test_async_writer_without_write(self):
    logdir = self.get_temp_dir()
    w = EventFileWriter(logdir)
    w.close()
    event_files = sorted(glob.glob(os.path.join(logdir, '*')))
    r = PyRecordReader_New(event_files[0])
    r.GetNext()
    s = event_pb2.Event.FromString(r.record())
    self.assertEqual(s.file_version, "brain.Event:2")


# skip the test, because tensorboard's implementaion of filewriter
# writes raw data while that in tensorboardX writes event protobuf.
class AsyncWriterTest(): #unittest.TestCase):
  def get_temp_dir(self):
    import tempfile
    return tempfile.mkdtemp()

  def test_async_writer_write_once(self):
    foldername = os.path.join(self.get_temp_dir(), "async_writer_write_once")
    w = _AsyncWriter(foldername)
    filename = w._ev_writer._file_name
    bytes_to_write = b"hello world"
    w.add_event(bytes_to_write)
    w.close()
    with open(filename, 'rb') as f:
      self.assertEqual(f.read(), bytes_to_write)

  def test_async_writer_write_queue_full(self):
    filename = os.path.join(self.get_temp_dir(), "async_writer_write_queue_full")
    w = _AsyncWriter(filename)
    bytes_to_write = b"hello world"
    repeat = 100
    for i in range(repeat):
      w.write(bytes_to_write)
    w.close()
    with open(filename, 'rb') as f:
      self.assertEqual(f.read(), bytes_to_write * repeat)

  def test_async_writer_write_one_slot_queue(self):
    filename = os.path.join(self.get_temp_dir(), "async_writer_write_one_slot_queue")
    w = _AsyncWriter(filename, max_queue_size=1)
    bytes_to_write = b"hello world"
    repeat = 10  # faster
    for i in range(repeat):
      w.write(bytes_to_write)
    w.close()
    with open(filename, 'rb') as f:
      self.assertEqual(f.read(), bytes_to_write * repeat)

  def test_async_writer_close_triggers_flush(self):
    filename = os.path.join(self.get_temp_dir(), "async_writer_close_triggers_flush")
    w = _AsyncWriter(filename)
    bytes_to_write = b"x" * 64
    w.write(bytes_to_write)
    w.close()
    with open(filename, 'rb') as f:
      self.assertEqual(f.read(), bytes_to_write)

  def test_write_after_async_writer_closed(self):
    filename = os.path.join(self.get_temp_dir(), "write_after_async_writer_closed")
    w = _AsyncWriter(filename)
    bytes_to_write = b"x" * 64
    w.write(bytes_to_write)
    w.close()

    with self.assertRaises(IOError):
      w.write(bytes_to_write)
    # nothing is written to the file after close
    with open(filename, 'rb') as f:
      self.assertEqual(f.read(), bytes_to_write)


if __name__ == '__main__':
  unittest.main()


================================================
FILE: tensorboardX/tests/expect/caffe_mnist.expect
================================================
node {
  name: "conv1/XavierFill"
  op: "XavierFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 20
          }
          dim {
            size: 1
          }
          dim {
            size: 5
          }
          dim {
            size: 5
          }
        }
      }
    }
  }
}
node {
  name: "conv1/ConstantFill"
  op: "ConstantFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 20
          }
        }
      }
    }
  }
}
node {
  name: "conv1/XavierFill_1"
  op: "XavierFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 100
          }
          dim {
            size: 20
          }
          dim {
            size: 5
          }
          dim {
            size: 5
          }
        }
      }
    }
  }
}
node {
  name: "conv1/ConstantFill_1"
  op: "ConstantFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 100
          }
        }
      }
    }
  }
}
node {
  name: "classifier/XavierFill"
  op: "XavierFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 500
          }
          dim {
            size: 1600
          }
        }
      }
    }
  }
}
node {
  name: "classifier/ConstantFill"
  op: "ConstantFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 500
          }
        }
      }
    }
  }
}
node {
  name: "classifier/XavierFill_1"
  op: "XavierFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 10
          }
          dim {
            size: 500
          }
        }
      }
    }
  }
}
node {
  name: "classifier/ConstantFill_1"
  op: "ConstantFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 10
          }
        }
      }
    }
  }
}
node {
  name: "ImageInput"
  op: "ImageInput"
  input: "db"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "is_test"
    value {
      i: 0
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "NHWC2NCHW"
  op: "NHWC2NCHW"
  input: "data_nhwc"
  device: "/gpu:0"
}
node {
  name: "conv1/Conv"
  op: "Conv"
  input: "data"
  input: "conv1/conv1_w"
  input: "conv1/conv1_b"
  device: "/gpu:0"
  attr {
    key: "exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 5
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "conv1/MaxPool"
  op: "MaxPool"
  input: "conv1/conv1"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "conv1/Conv_1"
  op: "Conv"
  input: "conv1/pool1"
  input: "conv1/conv2_w"
  input: "conv1/conv2_b"
  device: "/gpu:0"
  attr {
    key: "exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 5
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "conv1/MaxPool_1"
  op: "MaxPool"
  input: "conv1/conv2"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "classifier/FC"
  op: "FC"
  input: "conv1/pool2"
  input: "classifier/fc3_w"
  input: "classifier/fc3_b"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "classifier/Relu"
  op: "Relu"
  input: "classifier/fc3"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "classifier/FC_1"
  op: "FC"
  input: "classifier/fc3_1"
  input: "classifier/pred_w"
  input: "classifier/pred_b"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "classifier/Softmax"
  op: "Softmax"
  input: "classifier/pred"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "classifier/LabelCrossEntropy"
  op: "LabelCrossEntropy"
  input: "classifier/softmax"
  input: "label"
  device: "/gpu:0"
}
node {
  name: "classifier/AveragedLoss"
  op: "AveragedLoss"
  input: "classifier/xent"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/ConstantFill"
  op: "ConstantFill"
  input: "classifier/loss"
  device: "/gpu:0"
  attr {
    key: "value"
    value {
      f: 1.0
    }
  }
}
node {
  name: "GRADIENTS/classifier/AveragedLossGradient"
  op: "AveragedLossGradient"
  input: "classifier/xent"
  input: "GRADIENTS/classifier/loss_autogen_grad"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/LabelCrossEntropyGradient"
  op: "LabelCrossEntropyGradient"
  input: "classifier/softmax"
  input: "label"
  input: "GRADIENTS/classifier/xent_grad"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/SoftmaxGradient"
  op: "SoftmaxGradient"
  input: "classifier/softmax"
  input: "GRADIENTS/classifier/softmax_grad"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "GRADIENTS/classifier/FCGradient"
  op: "FCGradient"
  input: "classifier/fc3_1"
  input: "classifier/pred_w"
  input: "GRADIENTS/classifier/pred_grad"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "GRADIENTS/classifier/ReluGradient"
  op: "ReluGradient"
  input: "classifier/fc3_1"
  input: "GRADIENTS/classifier/fc3_grad"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "GRADIENTS/c/FCGradient"
  op: "FCGradient"
  input: "conv1/pool2"
  input: "classifier/fc3_w"
  input: "GRADIENTS/classifier/fc3_grad_1"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "GRADIENTS/conv1/MaxPoolGradient"
  op: "MaxPoolGradient"
  input: "conv1/conv2"
  input: "conv1/pool2"
  input: "GRADIENTS/conv1/pool2_grad"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "GRADIENTS/conv1/ConvGradient"
  op: "ConvGradient"
  input: "conv1/pool1"
  input: "conv1/conv2_w"
  input: "GRADIENTS/conv1/conv2_grad"
  device: "/gpu:0"
  attr {
    key: "exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 5
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "GRADIENTS/conv1/MaxPoolGradient_1"
  op: "MaxPoolGradient"
  input: "conv1/conv1"
  input: "conv1/pool1"
  input: "GRADIENTS/conv1/pool1_grad"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "GRADIENTS/ConvGradient"
  op: "ConvGradient"
  input: "data"
  input: "conv1/conv1_w"
  input: "GRADIENTS/conv1/conv1_grad"
  device: "/gpu:0"
  attr {
    key: "exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 5
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "GRADIENTS/NCHW2NHWC"
  op: "NCHW2NHWC"
  input: "GRADIENTS/data_grad"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/fc3_grad_1"
  op: "Blob"
  input: "GRADIENTS/classifier/ReluGradient:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/xent_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/AveragedLossGradient:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/pred_w_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/FCGradient:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/data_nhwc_grad"
  op: "Blob"
  input: "GRADIENTS/NCHW2NHWC:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/fc3_w_grad"
  op: "Blob"
  input: "GRADIENTS/c/FCGradient:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/conv1_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/MaxPoolGradient_1:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/conv1_b_grad"
  op: "Blob"
  input: "GRADIENTS/ConvGradient:1"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/conv2_w_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/ConvGradient:0"
  device: "/gpu:0"
}
node {
  name: "classifier/pred"
  op: "Blob"
  input: "classifier/FC_1:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/pool2_grad"
  op: "Blob"
  input: "GRADIENTS/c/FCGradient:2"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/conv1_w_grad"
  op: "Blob"
  input: "GRADIENTS/ConvGradient:0"
  device: "/gpu:0"
}
node {
  name: "data"
  op: "Blob"
  input: "NHWC2NCHW:0"
  device: "/gpu:0"
}
node {
  name: "classifier/xent"
  op: "Blob"
  input: "classifier/LabelCrossEntropy:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/pool1_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/ConvGradient:2"
  device: "/gpu:0"
}
node {
  name: "db"
  op: "Placeholder"
}
node {
  name: "classifier/fc3_b"
  op: "Blob"
  input: "classifier/ConstantFill:0"
  device: "/gpu:0"
}
node {
  name: "classifier/pred_b"
  op: "Blob"
  input: "classifier/ConstantFill_1:0"
  device: "/gpu:0"
}
node {
  name: "classifier/softmax"
  op: "Blob"
  input: "classifier/Softmax:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/data_grad"
  op: "Blob"
  input: "GRADIENTS/ConvGradient:2"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/pred_b_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/FCGradient:1"
  device: "/gpu:0"
}
node {
  name: "label"
  op: "Blob"
  input: "ImageInput:1"
  device: "/gpu:0"
}
node {
  name: "conv1/pool1"
  op: "Blob"
  input: "conv1/MaxPool:0"
  device: "/gpu:0"
}
node {
  name: "data_nhwc"
  op: "Blob"
  input: "ImageInput:0"
  device: "/gpu:0"
}
node {
  name: "conv1/conv2"
  op: "Blob"
  input: "conv1/Conv_1:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/conv2_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/MaxPoolGradient:0"
  device: "/gpu:0"
}
node {
  name: "conv1/conv2_b"
  op: "Blob"
  input: "conv1/ConstantFill_1:0"
  device: "/gpu:0"
}
node {
  name: "conv1/conv1_b"
  op: "Blob"
  input: "conv1/ConstantFill:0"
  device: "/gpu:0"
}
node {
  name: "classifier/fc3_w"
  op: "Blob"
  input: "classifier/XavierFill:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/fc3_b_grad"
  op: "Blob"
  input: "GRADIENTS/c/FCGradient:1"
  device: "/gpu:0"
}
node {
  name: "classifier/pred_w"
  op: "Blob"
  input: "classifier/XavierFill_1:0"
  device: "/gpu:0"
}
node {
  name: "conv1/pool2"
  op: "Blob"
  input: "conv1/MaxPool_1:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/conv2_b_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/ConvGradient:1"
  device: "/gpu:0"
}
node {
  name: "classifier/fc3_1"
  op: "Blob"
  input: "classifier/Relu:0"
  device: "/gpu:0"
}
node {
  name: "classifier/loss"
  op: "Blob"
  input: "classifier/AveragedLoss:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/fc3_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/FCGradient:2"
  device: "/gpu:0"
}
node {
  name: "conv1/conv1_w"
  op: "Blob"
  input: "conv1/XavierFill:0"
  device: "/gpu:0"
}
node {
  name: "conv1/conv1"
  op: "Blob"
  input: "conv1/Conv:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/loss_autogen_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/ConstantFill:0"
  device: "/gpu:0"
}
node {
  name: "classifier/fc3"
  op: "Blob"
  input: "classifier/FC:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/pred_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/SoftmaxGradient:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/softmax_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/LabelCrossEntropyGradient:0"
  device: "/gpu:0"
}
node {
  name: "conv1/conv2_w"
  op: "Blob"
  input: "conv1/XavierFill_1:0"
  device: "/gpu:0"
}

================================================
FILE: tensorboardX/tests/expect/caffe_overfeat.expect
================================================
node {
  name: "conv1/XavierFill"
  op: "XavierFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 96
          }
          dim {
            size: 3
          }
          dim {
            size: 11
          }
          dim {
            size: 11
          }
        }
      }
    }
  }
}
node {
  name: "conv1/ConstantFill"
  op: "ConstantFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 96
          }
        }
      }
    }
  }
}
node {
  name: "classifier/XavierFill"
  op: "XavierFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 1000
          }
          dim {
            size: 4096
          }
        }
      }
    }
  }
}
node {
  name: "classifier/ConstantFill"
  op: "ConstantFill"
  device: "/gpu:0"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 1000
          }
        }
      }
    }
  }
}
node {
  name: "ImageInput"
  op: "ImageInput"
  input: "db"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "is_test"
    value {
      i: 0
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "NHWC2NCHW"
  op: "NHWC2NCHW"
  input: "data_nhwc"
  device: "/gpu:0"
}
node {
  name: "conv1/Conv"
  op: "Conv"
  input: "data"
  input: "conv1/conv1_w"
  input: "conv1/conv1_b"
  device: "/gpu:0"
  attr {
    key: "exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 11
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 4
    }
  }
}
node {
  name: "conv1/Relu"
  op: "Relu"
  input: "conv1/conv1"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "conv1/MaxPool"
  op: "MaxPool"
  input: "conv1/conv1_1"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "classifier/FC"
  op: "FC"
  input: "conv1/pool1"
  input: "classifier/fc_w"
  input: "classifier/fc_b"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "classifier/Softmax"
  op: "Softmax"
  input: "classifier/fc"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "classifier/LabelCrossEntropy"
  op: "LabelCrossEntropy"
  input: "classifier/pred"
  input: "label"
  device: "/gpu:0"
}
node {
  name: "classifier/AveragedLoss"
  op: "AveragedLoss"
  input: "classifier/xent"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/ConstantFill"
  op: "ConstantFill"
  input: "classifier/loss"
  device: "/gpu:0"
  attr {
    key: "value"
    value {
      f: 1.0
    }
  }
}
node {
  name: "GRADIENTS/classifier/AveragedLossGradient"
  op: "AveragedLossGradient"
  input: "classifier/xent"
  input: "GRADIENTS/classifier/loss_autogen_grad"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/LabelCrossEntropyGradient"
  op: "LabelCrossEntropyGradient"
  input: "classifier/pred"
  input: "label"
  input: "GRADIENTS/classifier/xent_grad"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/SoftmaxGradient"
  op: "SoftmaxGradient"
  input: "classifier/pred"
  input: "GRADIENTS/classifier/pred_grad"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "GRADIENTS/c/FCGradient"
  op: "FCGradient"
  input: "conv1/pool1"
  input: "classifier/fc_w"
  input: "GRADIENTS/classifier/fc_grad"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "GRADIENTS/conv1/MaxPoolGradient"
  op: "MaxPoolGradient"
  input: "conv1/conv1_1"
  input: "conv1/pool1"
  input: "GRADIENTS/conv1/pool1_grad"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "GRADIENTS/conv1/ReluGradient"
  op: "ReluGradient"
  input: "conv1/conv1_1"
  input: "GRADIENTS/conv1/conv1_grad"
  device: "/gpu:0"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "GRADIENTS/ConvGradient"
  op: "ConvGradient"
  input: "data"
  input: "conv1/conv1_w"
  input: "GRADIENTS/conv1/conv1_grad_1"
  device: "/gpu:0"
  attr {
    key: "exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 11
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 4
    }
  }
}
node {
  name: "GRADIENTS/NCHW2NHWC"
  op: "NCHW2NHWC"
  input: "GRADIENTS/data_grad"
  device: "/gpu:0"
}
node {
  name: "conv1/conv1_w"
  op: "Blob"
  input: "conv1/XavierFill:0"
  device: "/gpu:0"
}
node {
  name: "classifier/fc"
  op: "Blob"
  input: "classifier/FC:0"
  device: "/gpu:0"
}
node {
  name: "data_nhwc"
  op: "Blob"
  input: "ImageInput:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/conv1_b_grad"
  op: "Blob"
  input: "GRADIENTS/ConvGradient:1"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/pred_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/LabelCrossEntropyGradient:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/fc_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/SoftmaxGradient:0"
  device: "/gpu:0"
}
node {
  name: "conv1/conv1_b"
  op: "Blob"
  input: "conv1/ConstantFill:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/fc_b_grad"
  op: "Blob"
  input: "GRADIENTS/c/FCGradient:1"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/fc_w_grad"
  op: "Blob"
  input: "GRADIENTS/c/FCGradient:0"
  device: "/gpu:0"
}
node {
  name: "label"
  op: "Blob"
  input: "ImageInput:1"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/data_grad"
  op: "Blob"
  input: "GRADIENTS/ConvGradient:2"
  device: "/gpu:0"
}
node {
  name: "classifier/loss"
  op: "Blob"
  input: "classifier/AveragedLoss:0"
  device: "/gpu:0"
}
node {
  name: "conv1/conv1"
  op: "Blob"
  input: "conv1/Conv:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/conv1_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/MaxPoolGradient:0"
  device: "/gpu:0"
}
node {
  name: "classifier/xent"
  op: "Blob"
  input: "classifier/LabelCrossEntropy:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/loss_autogen_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/ConstantFill:0"
  device: "/gpu:0"
}
node {
  name: "classifier/fc_w"
  op: "Blob"
  input: "classifier/XavierFill:0"
  device: "/gpu:0"
}
node {
  name: "conv1/conv1_1"
  op: "Blob"
  input: "conv1/Relu:0"
  device: "/gpu:0"
}
node {
  name: "db"
  op: "Placeholder"
}
node {
  name: "classifier/pred"
  op: "Blob"
  input: "classifier/Softmax:0"
  device: "/gpu:0"
}
node {
  name: "classifier/fc_b"
  op: "Blob"
  input: "classifier/ConstantFill:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/classifier/xent_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/AveragedLossGradient:0"
  device: "/gpu:0"
}
node {
  name: "data"
  op: "Blob"
  input: "NHWC2NCHW:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/conv1_w_grad"
  op: "Blob"
  input: "GRADIENTS/ConvGradient:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/conv1_grad_1"
  op: "Blob"
  input: "GRADIENTS/conv1/ReluGradient:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/data_nhwc_grad"
  op: "Blob"
  input: "GRADIENTS/NCHW2NHWC:0"
  device: "/gpu:0"
}
node {
  name: "GRADIENTS/conv1/pool1_grad"
  op: "Blob"
  input: "GRADIENTS/c/FCGradient:2"
  device: "/gpu:0"
}
node {
  name: "conv1/pool1"
  op: "Blob"
  input: "conv1/MaxPool:0"
  device: "/gpu:0"
}
"""


================================================
FILE: tensorboardX/tests/expect/test_caffe2.test_simple_cnnmodel.expect
================================================
node {
  name: "conv1/XavierFill"
  op: "XavierFill"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 96
          }
          dim {
            size: 3
          }
          dim {
            size: 11
          }
          dim {
            size: 11
          }
        }
      }
    }
  }
}
node {
  name: "conv1/ConstantFill"
  op: "ConstantFill"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 96
          }
        }
      }
    }
  }
}
node {
  name: "classifier/XavierFill"
  op: "XavierFill"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 1000
          }
          dim {
            size: 4096
          }
        }
      }
    }
  }
}
node {
  name: "classifier/ConstantFill"
  op: "ConstantFill"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 1000
          }
        }
      }
    }
  }
}
node {
  name: "conv1/Conv"
  op: "Conv"
  input: "conv1/data"
  input: "conv1/conv1_w"
  input: "conv1/conv1_b"
  attr {
    key: "exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 11
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 4
    }
  }
}
node {
  name: "conv1/Relu"
  op: "Relu"
  input: "conv1/conv1"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "conv1/MaxPool"
  op: "MaxPool"
  input: "conv1/conv1_1"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "classifier/FC"
  op: "FC"
  input: "conv1/pool1"
  input: "classifier/fc_w"
  input: "classifier/fc_b"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "classifier/Softmax"
  op: "Softmax"
  input: "classifier/fc"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "classifier/LabelCrossEntropy"
  op: "LabelCrossEntropy"
  input: "classifier/pred"
  input: "classifier/label"
}
node {
  name: "classifier/AveragedLoss"
  op: "AveragedLoss"
  input: "classifier/xent"
}
node {
  name: "conv1/conv1_w"
  op: "Blob"
  input: "conv1/XavierFill:0"
}
node {
  name: "conv1/conv1_b"
  op: "Blob"
  input: "conv1/ConstantFill:0"
}
node {
  name: "classifier/fc_w"
  op: "Blob"
  input: "classifier/XavierFill:0"
}
node {
  name: "classifier/fc_b"
  op: "Blob"
  input: "classifier/ConstantFill:0"
}
node {
  name: "conv1/data"
  op: "Placeholder"
}
node {
  name: "conv1/conv1_w"
  op: "Blob"
  input: "conv1/XavierFill:0"
}
node {
  name: "conv1/conv1_b"
  op: "Blob"
  input: "conv1/ConstantFill:0"
}
node {
  name: "conv1/conv1"
  op: "Blob"
  input: "conv1/Conv:0"
}
node {
  name: "conv1/conv1"
  op: "Blob"
  input: "conv1/Conv:0"
}
node {
  name: "conv1/conv1_1"
  op: "Blob"
  input: "conv1/Relu:0"
}
node {
  name: "conv1/conv1_1"
  op: "Blob"
  input: "conv1/Relu:0"
}
node {
  name: "conv1/pool1"
  op: "Blob"
  input: "conv1/MaxPool:0"
}
node {
  name: "conv1/pool1"
  op: "Blob"
  input: "conv1/MaxPool:0"
}
node {
  name: "classifier/fc_w"
  op: "Blob"
  input: "classifier/XavierFill:0"
}
node {
  name: "classifier/fc_b"
  op: "Blob"
  input: "classifier/ConstantFill:0"
}
node {
  name: "classifier/fc"
  op: "Blob"
  input: "classifier/FC:0"
}
node {
  name: "classifier/fc"
  op: "Blob"
  input: "classifier/FC:0"
}
node {
  name: "classifier/pred"
  op: "Blob"
  input: "classifier/Softmax:0"
}
node {
  name: "classifier/pred"
  op: "Blob"
  input: "classifier/Softmax:0"
}
node {
  name: "classifier/label"
  op: "Placeholder"
}
node {
  name: "classifier/xent"
  op: "Blob"
  input: "classifier/LabelCrossEntropy:0"
}
node {
  name: "classifier/xent"
  op: "Blob"
  input: "classifier/LabelCrossEntropy:0"
}
node {
  name: "classifier/loss"
  op: "Blob"
  input: "classifier/AveragedLoss:0"
}


================================================
FILE: tensorboardX/tests/expect/test_caffe2.test_simple_model.expect
================================================
node {
  name: "conv1/XavierFill"
  op: "XavierFill"
  device: "/cpu:*"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 20
          }
          dim {
            size: 1
          }
          dim {
            size: 5
          }
          dim {
            size: 5
          }
        }
      }
    }
  }
}
node {
  name: "conv1/ConstantFill"
  op: "ConstantFill"
  device: "/cpu:*"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 20
          }
        }
      }
    }
  }
}
node {
  name: "conv1/XavierFill_1"
  op: "XavierFill"
  device: "/cpu:*"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 100
          }
          dim {
            size: 20
          }
          dim {
            size: 5
          }
          dim {
            size: 5
          }
        }
      }
    }
  }
}
node {
  name: "conv1/ConstantFill_1"
  op: "ConstantFill"
  device: "/cpu:*"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 100
          }
        }
      }
    }
  }
}
node {
  name: "classifier/XavierFill"
  op: "XavierFill"
  device: "/cpu:*"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 500
          }
          dim {
            size: 1600
          }
        }
      }
    }
  }
}
node {
  name: "classifier/ConstantFill"
  op: "ConstantFill"
  device: "/cpu:*"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 500
          }
        }
      }
    }
  }
}
node {
  name: "classifier/XavierFill_1"
  op: "XavierFill"
  device: "/cpu:*"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 10
          }
          dim {
            size: 500
          }
        }
      }
    }
  }
}
node {
  name: "classifier/ConstantFill_1"
  op: "ConstantFill"
  device: "/cpu:*"
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: 10
          }
        }
      }
    }
  }
}
node {
  name: "conv1/Conv"
  op: "Conv"
  input: "conv1/data"
  input: "conv1/conv1_w"
  input: "conv1/conv1_b"
  device: "/cpu:*"
  attr {
    key: "exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 5
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "conv1/MaxPool"
  op: "MaxPool"
  input: "conv1/conv1"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "conv1/Conv_1"
  op: "Conv"
  input: "conv1/pool1"
  input: "conv1/conv2_w"
  input: "conv1/conv2_b"
  device: "/cpu:*"
  attr {
    key: "exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 5
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "conv1/MaxPool_1"
  op: "MaxPool"
  input: "conv1/conv2"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "classifier/FC"
  op: "FC"
  input: "conv1/pool2"
  input: "classifier/fc3_w"
  input: "classifier/fc3_b"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "classifier/Relu"
  op: "Relu"
  input: "classifier/fc3"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "classifier/FC_1"
  op: "FC"
  input: "classifier/fc3_1"
  input: "classifier/pred_w"
  input: "classifier/pred_b"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "classifier/Softmax"
  op: "Softmax"
  input: "classifier/pred"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "classifier/LabelCrossEntropy"
  op: "LabelCrossEntropy"
  input: "classifier/softmax"
  input: "classifier/label"
  device: "/cpu:*"
}
node {
  name: "classifier/AveragedLoss"
  op: "AveragedLoss"
  input: "classifier/xent"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/ConstantFill"
  op: "ConstantFill"
  input: "classifier/loss"
  device: "/cpu:*"
  attr {
    key: "value"
    value {
      f: 1.0
    }
  }
}
node {
  name: "GRADIENTS/classifier/AveragedLossGradient"
  op: "AveragedLossGradient"
  input: "classifier/xent"
  input: "GRADIENTS/classifier/loss_autogen_grad"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/LabelCrossEntropyGradient"
  op: "LabelCrossEntropyGradient"
  input: "classifier/softmax"
  input: "classifier/label"
  input: "GRADIENTS/classifier/xent_grad"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/SoftmaxGradient"
  op: "SoftmaxGradient"
  input: "classifier/softmax"
  input: "GRADIENTS/classifier/softmax_grad"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "GRADIENTS/classifier/FCGradient"
  op: "FCGradient"
  input: "classifier/fc3_1"
  input: "classifier/pred_w"
  input: "GRADIENTS/classifier/pred_grad"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "GRADIENTS/classifier/ReluGradient"
  op: "ReluGradient"
  input: "classifier/fc3_1"
  input: "GRADIENTS/classifier/fc3_grad"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "GRADIENTS/c/FCGradient"
  op: "FCGradient"
  input: "conv1/pool2"
  input: "classifier/fc3_w"
  input: "GRADIENTS/classifier/fc3_grad_1"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "use_cudnn"
    value {
      i: 1
    }
  }
}
node {
  name: "GRADIENTS/conv1/MaxPoolGradient"
  op: "MaxPoolGradient"
  input: "conv1/conv2"
  input: "conv1/pool2"
  input: "GRADIENTS/conv1/pool2_grad"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "GRADIENTS/conv1/ConvGradient"
  op: "ConvGradient"
  input: "conv1/pool1"
  input: "conv1/conv2_w"
  input: "GRADIENTS/conv1/conv2_grad"
  device: "/cpu:*"
  attr {
    key: "exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 5
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
}
node {
  name: "GRADIENTS/conv1/MaxPoolGradient_1"
  op: "MaxPoolGradient"
  input: "conv1/conv1"
  input: "conv1/pool1"
  input: "GRADIENTS/conv1/pool1_grad"
  device: "/cpu:*"
  attr {
    key: "cudnn_exhaustive_search"
    value {
      i: 0
    }
  }
  attr {
    key: "kernel"
    value {
      i: 2
    }
  }
  attr {
    key: "order"
    value {
      s: "NCHW"
    }
  }
  attr {
    key: "stride"
    value {
      i: 2
    }
  }
}
node {
  name: "conv1/conv1_w"
  op: "Blob"
  input: "conv1/XavierFill:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv1_b"
  op: "Blob"
  input: "conv1/ConstantFill:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv2_w"
  op: "Blob"
  input: "conv1/XavierFill_1:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv2_b"
  op: "Blob"
  input: "conv1/ConstantFill_1:0"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3_w"
  op: "Blob"
  input: "classifier/XavierFill:0"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3_b"
  op: "Blob"
  input: "classifier/ConstantFill:0"
  device: "/cpu:*"
}
node {
  name: "classifier/pred_w"
  op: "Blob"
  input: "classifier/XavierFill_1:0"
  device: "/cpu:*"
}
node {
  name: "classifier/pred_b"
  op: "Blob"
  input: "classifier/ConstantFill_1:0"
  device: "/cpu:*"
}
node {
  name: "conv1/data"
  op: "Placeholder"
}
node {
  name: "conv1/conv1_w"
  op: "Blob"
  input: "conv1/XavierFill:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv1_b"
  op: "Blob"
  input: "conv1/ConstantFill:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv1"
  op: "Blob"
  input: "conv1/Conv:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv1"
  op: "Blob"
  input: "conv1/Conv:0"
  device: "/cpu:*"
}
node {
  name: "conv1/pool1"
  op: "Blob"
  input: "conv1/MaxPool:0"
  device: "/cpu:*"
}
node {
  name: "conv1/pool1"
  op: "Blob"
  input: "conv1/MaxPool:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv2_w"
  op: "Blob"
  input: "conv1/XavierFill_1:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv2_b"
  op: "Blob"
  input: "conv1/ConstantFill_1:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv2"
  op: "Blob"
  input: "conv1/Conv_1:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv2"
  op: "Blob"
  input: "conv1/Conv_1:0"
  device: "/cpu:*"
}
node {
  name: "conv1/pool2"
  op: "Blob"
  input: "conv1/MaxPool_1:0"
  device: "/cpu:*"
}
node {
  name: "conv1/pool2"
  op: "Blob"
  input: "conv1/MaxPool_1:0"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3_w"
  op: "Blob"
  input: "classifier/XavierFill:0"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3_b"
  op: "Blob"
  input: "classifier/ConstantFill:0"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3"
  op: "Blob"
  input: "classifier/FC:0"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3"
  op: "Blob"
  input: "classifier/FC:0"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3_1"
  op: "Blob"
  input: "classifier/Relu:0"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3_1"
  op: "Blob"
  input: "classifier/Relu:0"
  device: "/cpu:*"
}
node {
  name: "classifier/pred_w"
  op: "Blob"
  input: "classifier/XavierFill_1:0"
  device: "/cpu:*"
}
node {
  name: "classifier/pred_b"
  op: "Blob"
  input: "classifier/ConstantFill_1:0"
  device: "/cpu:*"
}
node {
  name: "classifier/pred"
  op: "Blob"
  input: "classifier/FC_1:0"
  device: "/cpu:*"
}
node {
  name: "classifier/pred"
  op: "Blob"
  input: "classifier/FC_1:0"
  device: "/cpu:*"
}
node {
  name: "classifier/softmax"
  op: "Blob"
  input: "classifier/Softmax:0"
  device: "/cpu:*"
}
node {
  name: "classifier/softmax"
  op: "Blob"
  input: "classifier/Softmax:0"
  device: "/cpu:*"
}
node {
  name: "classifier/label"
  op: "Placeholder"
}
node {
  name: "classifier/xent"
  op: "Blob"
  input: "classifier/LabelCrossEntropy:0"
  device: "/cpu:*"
}
node {
  name: "classifier/xent"
  op: "Blob"
  input: "classifier/LabelCrossEntropy:0"
  device: "/cpu:*"
}
node {
  name: "classifier/loss"
  op: "Blob"
  input: "classifier/AveragedLoss:0"
  device: "/cpu:*"
}
node {
  name: "classifier/loss"
  op: "Blob"
  input: "classifier/AveragedLoss:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/loss_autogen_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/ConstantFill:0"
  device: "/cpu:*"
}
node {
  name: "classifier/xent"
  op: "Blob"
  input: "classifier/LabelCrossEntropy:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/loss_autogen_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/ConstantFill:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/xent_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/AveragedLossGradient:0"
  device: "/cpu:*"
}
node {
  name: "classifier/softmax"
  op: "Blob"
  input: "classifier/Softmax:0"
  device: "/cpu:*"
}
node {
  name: "classifier/label"
  op: "Placeholder"
}
node {
  name: "GRADIENTS/classifier/xent_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/AveragedLossGradient:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/softmax_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/LabelCrossEntropyGradient:0"
  device: "/cpu:*"
}
node {
  name: "classifier/softmax"
  op: "Blob"
  input: "classifier/Softmax:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/softmax_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/LabelCrossEntropyGradient:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/pred_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/SoftmaxGradient:0"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3_1"
  op: "Blob"
  input: "classifier/Relu:0"
  device: "/cpu:*"
}
node {
  name: "classifier/pred_w"
  op: "Blob"
  input: "classifier/XavierFill_1:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/pred_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/SoftmaxGradient:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/pred_w_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/FCGradient:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/pred_b_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/FCGradient:1"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/fc3_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/FCGradient:2"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3_1"
  op: "Blob"
  input: "classifier/Relu:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/fc3_grad"
  op: "Blob"
  input: "GRADIENTS/classifier/FCGradient:2"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/fc3_grad_1"
  op: "Blob"
  input: "GRADIENTS/classifier/ReluGradient:0"
  device: "/cpu:*"
}
node {
  name: "conv1/pool2"
  op: "Blob"
  input: "conv1/MaxPool_1:0"
  device: "/cpu:*"
}
node {
  name: "classifier/fc3_w"
  op: "Blob"
  input: "classifier/XavierFill:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/fc3_grad_1"
  op: "Blob"
  input: "GRADIENTS/classifier/ReluGradient:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/fc3_w_grad"
  op: "Blob"
  input: "GRADIENTS/c/FCGradient:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/classifier/fc3_b_grad"
  op: "Blob"
  input: "GRADIENTS/c/FCGradient:1"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/conv1/pool2_grad"
  op: "Blob"
  input: "GRADIENTS/c/FCGradient:2"
  device: "/cpu:*"
}
node {
  name: "conv1/conv2"
  op: "Blob"
  input: "conv1/Conv_1:0"
  device: "/cpu:*"
}
node {
  name: "conv1/pool2"
  op: "Blob"
  input: "conv1/MaxPool_1:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/conv1/pool2_grad"
  op: "Blob"
  input: "GRADIENTS/c/FCGradient:2"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/conv1/conv2_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/MaxPoolGradient:0"
  device: "/cpu:*"
}
node {
  name: "conv1/pool1"
  op: "Blob"
  input: "conv1/MaxPool:0"
  device: "/cpu:*"
}
node {
  name: "conv1/conv2_w"
  op: "Blob"
  input: "conv1/XavierFill_1:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/conv1/conv2_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/MaxPoolGradient:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/conv1/conv2_w_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/ConvGradient:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/conv1/conv2_b_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/ConvGradient:1"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/conv1/pool1_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/ConvGradient:2"
  device: "/cpu:*"
}
node {
  name: "conv1/conv1"
  op: "Blob"
  input: "conv1/Conv:0"
  device: "/cpu:*"
}
node {
  name: "conv1/pool1"
  op: "Blob"
  input: "conv1/MaxPool:0"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/conv1/pool1_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/ConvGradient:2"
  device: "/cpu:*"
}
node {
  name: "GRADIENTS/conv1/conv1_grad"
  op: "Blob"
  input: "GRADIENTS/conv1/MaxPoolGradient_1:0"
  device: "/cpu:*"
}


================================================
FILE: tensorboardX/tests/expect/test_pr_curve.test_pr_purve.expect
================================================
value {
  tag: "tag"
  tensor {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 6
      }
      dim {
        size: 1
      }
    }
    float_val: 57.0
    float_val: 43.0
    float_val: 0.0
    float_val: 0.0
    float_val: 0.57
    float_val: 1.0
  }
  metadata {
    plugin_data {
      plugin_name: "pr_curves"
      content: "\020\001"
    }
  }
}


================================================
FILE: tensorboardX/tests/expect/test_pr_curve.test_pr_purve_raw.expect
================================================
value {
  tag: "prcurve with raw data"
  tensor {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 6
      }
      dim {
        size: 5
      }
    }
    float_val: 75.0
    float_val: 64.0
    float_val: 21.0
    float_val: 5.0
    float_val: 0.0
    float_val: 150.0
    float_val: 105.0
    float_val: 18.0
    float_val: 0.0
    float_val: 0.0
    float_val: 0.0
    float_val: 45.0
    float_val: 132.0
    float_val: 150.0
    float_val: 150.0
    float_val: 0.0
    float_val: 11.0
    float_val: 54.0
    float_val: 70.0
    float_val: 75.0
    float_val: 0.3333333
    float_val: 0.3786982
    float_val: 0.5384616
    float_val: 1.0
    float_val: 0.0
    float_val: 1.0
    float_val: 0.8533334
    float_val: 0.28
    float_val: 0.0666667
    float_val: 0.0
  }
  metadata {
    plugin_data {
      plugin_name: "pr_curves"
      content: "\020\001"
    }
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_audio.expect
================================================
value {
  tag: "dummy"
  audio {
    sample_rate: 44100.0
    num_channels: 1
    length_frames: 42
    encoded_audio_string: "RIFFx\000\000\000WAVEfmt \020\000\000\000\001\000\001\000D\254\000\000\210X\001\000\002\000\020\000dataT\000\000\000\000\000\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177\377\177"
    content_type: "audio/wav"
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_custom_scalars.expect
================================================
value {
  tag: "custom_scalars__config__"
  tensor {
    dtype: DT_STRING
    tensor_shape {
    }
    string_val: "\022(\n\006Taiwan\022\036\n\004twse\022\026\n\ttwse/0050\n\ttwse/2330\022]\n\003USA\022$\n\003dow\032\035\n\033\n\007dow/aaa\022\007dow/bbb\032\007dow/ccc\0220\n\006nasdaq\032&\n$\n\nnasdaq/aaa\022\nnasdaq/bbb\032\nnasdaq/ccc"
  }
  metadata {
    plugin_data {
      plugin_name: "custom_scalars"
    }
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_float32_image.expect
================================================
value {
  tag: "dummy"
  image {
    height: 32
    width: 32
    colorspace: 3
    encoded_image_string: "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000 \000\000\000 \010\002\000\000\000\374\030\355\243\000\000\000DIDATx\234cd``\370OK\300\370\340\301\003\232Z\3002j\301\360\267\200QAA\201\266\026\214\346\203Q\013\006\277\005\243\371\200 \030\372\221<j\001A0\232\017\010\202\241\037\311\243\026\020\0044\317\007\000]7\325\342\027k\025c\000\000\000\000IEND\256B`\202"
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_histogram_auto.expect
================================================
value {
  tag: "dummy"
  histo {
    max: 1023.0
    num: 1024.0
    sum: 523776.0
    sum_squares: 357389824.0
    bucket_limit: 0.0
    bucket_limit: 186.0
    bucket_limit: 372.0
    bucket_limit: 558.0
    bucket_limit: 744.0
    bucket_limit: 930.0
    bucket_limit: 1023.0
    bucket: 0.0
    bucket: 186.0
    bucket: 186.0
    bucket: 186.0
    bucket: 186.0
    bucket: 186.0
    bucket: 94.0
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_histogram_doane.expect
================================================
value {
  tag: "dummy"
  histo {
    max: 1023.0
    num: 1024.0
    sum: 523776.0
    sum_squares: 357389824.0
    bucket_limit: 0.0
    bucket_limit: 186.0
    bucket_limit: 372.0
    bucket_limit: 558.0
    bucket_limit: 744.0
    bucket_limit: 930.0
    bucket_limit: 1023.0
    bucket: 0.0
    bucket: 186.0
    bucket: 186.0
    bucket: 186.0
    bucket: 186.0
    bucket: 186.0
    bucket: 94.0
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_histogram_fd.expect
================================================
value {
  tag: "dummy"
  histo {
    max: 1023.0
    num: 1024.0
    sum: 523776.0
    sum_squares: 357389824.0
    bucket_limit: 0.0
    bucket_limit: 186.0
    bucket_limit: 372.0
    bucket_limit: 558.0
    bucket_limit: 744.0
    bucket_limit: 930.0
    bucket_limit: 1023.0
    bucket: 0.0
    bucket: 186.0
    bucket: 186.0
    bucket: 186.0
    bucket: 186.0
    bucket: 186.0
    bucket: 94.0
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_hparams.expect
================================================
(value {
  tag: "_hparams_/experiment"
  metadata {
    plugin_data {
      plugin_name: "hparams"
      content: "\022\024\"\004\n\002lr*\014\n\n\022\010accuracy"
    }
  }
}
, value {
  tag: "_hparams_/session_start_info"
  metadata {
    plugin_data {
      plugin_name: "hparams"
      content: "\032\021\n\017\n\002lr\022\t\021\232\231\231\231\231\231\271?"
    }
  }
}
, value {
  tag: "_hparams_/session_end_info"
  metadata {
    plugin_data {
      plugin_name: "hparams"
      content: "\"\002\010\001"
    }
  }
}
)

================================================
FILE: tensorboardX/tests/expect/test_summary.test_image_with_3_channel_batched.expect
================================================
value {
  tag: "dummy"
  image {
    height: 8
    width: 16
    colorspace: 3
    encoded_image_string: "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\020\000\000\000\010\010\002\000\000\000\177\024\350\300\000\000\000+IDATx\234cd8\320\360\037\033pww\307*\316\362\343\307\217\037\330$~\374\370\361\037\233\004\013\016\365\377q\211\217H\r\000d\305y\224,\220Z\033\000\000\000\000IEND\256B`\202"
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_image_with_boxes.expect
================================================
value {
  tag: "dummy"
  image {
    height: 32
    width: 32
    colorspace: 3
    encoded_image_string: "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000 \000\000\000 \010\002\000\000\000\374\030\355\243\000\000\000sIDATx\234\355\323=\n\300 \014\005\340\027p\250\267p\324\373\332\373\345\020vn\007\367>0\204b\311\233\305/\344G\000\334\236\021Uu\005R\000\377\007\244\224\342\013||\007\2655\330BfP\215\337S`>:{_l\020\335\242\tX6-\000\032r\007G\316\000\2561\226\201\244\252/\005V\357\026\271\003\033\0149\000\232\270\003+\260\301\220\003\240y\000T\221\324V\250_v\320\000\000\000\000IEND\256B`\202"
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_image_with_four_channel.expect
================================================
value {
  tag: "dummy"
  image {
    height: 8
    width: 8
    colorspace: 4
    encoded_image_string: "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\010\000\000\000\010\010\006\000\000\000\304\017\276\213\000\000\000\036IDATx\234cd8\320\340\360\037\017`\371\361\343\307\217\037\204\024\0204a\260+\000\000\240\302\373\327\246\231O\'\000\000\000\000IEND\256B`\202"
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_image_with_four_channel_batched.expect
================================================
value {
  tag: "dummy"
  image {
    height: 8
    width: 16
    colorspace: 4
    encoded_image_string: "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\020\000\000\000\010\010\006\000\000\000\360v\177\227\000\000\000-IDATx\234cd8\320\340\360\037\017`ggg\307\'\317\362\343\307\217\037?\360(\370\001\305x\r\300g\003!0j\000\025\014\000\000\356b\366\370\366\336\316\301\000\000\000\000IEND\256B`\202"
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_image_with_one_channel.expect
================================================
value {
  tag: "dummy"
  image {
    height: 8
    width: 8
    colorspace: 3
    encoded_image_string: "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\010\000\000\000\010\010\002\000\000\000Km)\334\000\000\000\031IDATx\234cd``\370\217\r0\376\370\361\003\253\004\313\240\224\000\000;\267\273\313%\020=\255\000\000\000\000IEND\256B`\202"
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_image_with_one_channel_batched.expect
================================================
value {
  tag: "dummy"
  image {
    height: 8
    width: 16
    colorspace: 3
    encoded_image_string: "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\020\000\000\000\010\010\002\000\000\000\177\024\350\300\000\000\000(IDATx\234cd``\370\217\r\034?~\034\2538\313\217\037?~\374\370\201)\201U\020\252\001\253\304\250\006$\000\000\230\346y\315\204l;t\000\000\000\000IEND\256B`\202"
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_image_without_channel.expect
================================================
value {
  tag: "dummy"
  image {
    height: 8
    width: 8
    colorspace: 3
    encoded_image_string: "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\010\000\000\000\010\010\002\000\000\000Km)\334\000\000\000\031IDATx\234cd``\370\217\r0\376\370\361\003\253\004\313\240\224\000\000;\267\273\313%\020=\255\000\000\000\000IEND\256B`\202"
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_mesh.expect
================================================
value {
  tag: "my_mesh_1"
  tensor {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 1
      }
      dim {
        size: 4
      }
      dim {
        size: 3
      }
    }
    float_val: 1.0
    float_val: 1.0
    float_val: 1.0
    float_val: -1.0
    float_val: -1.0
    float_val: 1.0
    float_val: 1.0
    float_val: -1.0
    float_val: -1.0
    float_val: -1.0
    float_val: 1.0
    float_val: -1.0
  }
  metadata {
    plugin_data {
      plugin_name: "mesh"
      content: "\022\007my_mesh\030\001*\004null2\003\001\004\003"
    }
  }
}
value {
  tag: "my_mesh_2"
  tensor {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 1
      }
      dim {
        size: 4
      }
      dim {
        size: 3
      }
    }
    float_val: 0.0
    float_val: 2.0
    float_val: 3.0
    float_val: 0.0
    float_val: 3.0
    float_val: 1.0
    float_val: 0.0
    float_val: 1.0
    float_val: 2.0
    float_val: 1.0
    float_val: 3.0
    float_val: 2.0
  }
  metadata {
    plugin_data {
      plugin_name: "mesh"
      content: "\022\007my_mesh\030\002*\004null2\003\001\004\003"
    }
  }
}
value {
  tag: "my_mesh_3"
  tensor {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 1
      }
      dim {
        size: 4
      }
      dim {
        size: 3
      }
    }
    float_val: 255.0
    float_val: 0.0
    float_val: 0.0
    float_val: 0.0
    float_val: 255.0
    float_val: 0.0
    float_val: 0.0
    float_val: 0.0
    float_val: 255.0
    float_val: 255.0
    float_val: 0.0
    float_val: 255.0
  }
  metadata {
    plugin_data {
      plugin_name: "mesh"
      content: "\022\007my_mesh\030\003*\004null2\003\001\004\003"
    }
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_text.expect
================================================
value {
  tag: "dummy/text_summary"
  tensor {
    dtype: DT_STRING
    tensor_shape {
      dim {
        size: 1
      }
    }
    string_val: "text 123"
  }
  metadata {
    plugin_data {
      plugin_name: "text"
    }
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_uint8_image.expect
================================================
value {
  tag: "dummy"
  image {
    height: 32
    width: 32
    colorspace: 3
    encoded_image_string: "\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000 \000\000\000 \010\002\000\000\000\374\030\355\243\000\000\000CIDATx\234cd```\244)PPP\240\251\371,\243\026\014\177\013\030\037<x@[\013F\363\301\250\005\203\337\202\321|@\020\014\375H\036\265\2000\030\315\007\204\300\320\217\344Q\013\010\003Z\347\003\000\211\014\037}z\035\001}\000\000\000\000IEND\256B`\202"
  }
}


================================================
FILE: tensorboardX/tests/expect/test_summary.test_video.expect
================================================
value {
  tag: "dummy"
  image {
    height: 16
    width: 16
    colorspace: 1
    encoded_image_string: "GIF89a\020\000\020\000\207\000\000\377\377\377\376\376\376\375\375\375\374\374\374\373\373\373\372\372\372\371\371\371\370\370\370\367\367\367\366\366\366\365\365\365\364\364\364\363\363\363\362\362\362\361\361\361\360\360\360\357\357\357\356\356\356\355\355\355\354\354\354\353\353\353\352\352\352\351\351\351\350\350\350\347\347\347\346\346\346\345\345\345\344\344\344\343\343\343\342\342\342\341\341\341\340\340\340\337\337\337\336\336\336\335\335\335\334\334\334\333\333\333\332\332\332\331\331\331\330\330\330\327\327\327\326\326\326\325\325\325\324\324\324\323\323\323\322\322\322\321\321\321\320\320\320\317\317\317\316\316\316\315\315\315\314\314\314\313\313\313\312\312\312\311\311\311\310\310\310\307\307\307\306\306\306\305\305\305\304\304\304\303\303\303\302\302\302\301\301\301\300\300\300\277\277\277\276\276\276\275\275\275\274\274\274\273\273\273\272\272\272\271\271\271\270\270\270\267\267\267\266\266\266\265\265\265\264\264\264\263\263\263\262\262\262\261\261\261\260\260\260\257\257\257\256\256\256\255\255\255\254\254\254\253\253\253\252\252\252\251\251\251\250\250\250\247\247\247\246\246\246\245\245\245\244\244\244\243\243\243\242\242\242\241\241\241\240\240\240\237\237\237\236\236\236\235\235\235\234\234\234\233\233\233\232\232\232\231\231\231\230\230\230\227\227\227\226\226\226\225\225\225\224\224\224\223\223\223\222\222\222\221\221\221\220\220\220\217\217\217\216\216\216\215\215\215\214\214\214\213\213\213\212\212\212\211\211\211\210\210\210\207\207\207\206\206\206\205\205\205\204\204\204\203\203\203\202\202\202\201\201\201\200\200\200\177\177\177~~~}}}|||{{{zzzyyyxxxwwwvvvuuutttsssrrrqqqpppooonnnmmmlllkkkjjjiiihhhgggfffeeedddcccbbbaaa```___^^^]]]\\\\\\[[[ZZZYYYXXXWWWVVVUUUTTTSSSRRRQQQPPPOOONNNMMMLLLKKKJJJIIIHHHGGGFFFEEEDDDCCCBBBAAA@@@???>>>===<<<;;;:::999888777666555444333222111000///...---,,,+++***)))(((\'\'\'&&&%%%$$$###\"\"\"!!!   \037\037\037\036\036\036\035\035\035\034\034\034\033\033\033\032\032\032\031\031\031\030\030\030\027\027\027\026\026\026\025\025\025\024\024\024\023\023\023\022\022\022\021\021\021\020\020\020\017\017\017\016\016\016\r\r\r\014\014\014\013\013\013\n\n\n\t\t\t\010\010\010\007\007\007\006\006\006\005\005\005\004\004\004\003\003\003\002\002\002\001\001\001\000\000\000!\377\013NETSCAPE2.0\003\001\377\377\000!\371\004\010\031\000\000\000,\000\000\000\000\020\000\020\000\000\010\377\000\377\001\010 `\000\201\002\006~\001\013&l\030\261b\306\016 H\240`\001\203\006\016\216!K\246l\031\263f\316\036@\210 a\002\205\n\026\236A\213&m\032\265j\326.`\310\240a\003\207\016\036\256a\313\246m\033\267n\336>\200\010!b\004\211\022&\276\201\013\'n\034\271r\346N\240H\241b\005\213\026.\316\241K\247n\035\273v\356^\300\210!c\006\215\0326\336\301\213\'o\036\275z\366n\340\310\241c\007\217\036>\356\341\313\247o\037\277~\376\376\000\n$h\020\241B\206~\000\t\"d\010\221\"F\016!J\244h\021\243F\216\216 I\242d\t\223&N\036A\212$i\022\245J\226\236@\211\"e\n\225*V.a\312\244i\023\247N\236\256`\311\242e\013\227.^>\201\n%j\024\251R\246\276\200\t#f\014\2312fN\241J\245j\025\253V\256\316\240I\243f\r\2336n^\301%\212%k\026\255Z\266\336\300\211#g\016\235:vn\341\312\245k\027\257^\276\356\340\311\243g\017\037\200}\374\004\004\000!\371\004\010\031\000\000\000,\000\000\000\000\020\000\020\000\000\010\377\000\177\000\t\"d\010\221\"F\376\001\010 `\000\201\002\006\216 I\242d\t\223&N\016 H\240`\001\203\006\016\236@\211\"e\n\225*V\036@\210 a\002\205\n\026\256`\311\242e\013\227.^.`\310\240a\003\207\016\036\276\200\t#f\014\2312f>\200\010!b\004\211\022&\316\240I\243f\r\2336nN\240H\241b\005\213\026.\336\300\211#g\016\235:v^\300\210!c\006\215\0326\356\340\311\243g\017\237>~n\340\310\241c\007\217\036>~\001\013&l\030\261b\306\376\000\n$h\020\241B\206\216!K\246l\031\263f\316\016!J\244h\021\243F\216\236A\213&m\032\265j\326\036A\212$i\022\245J\226\256a\313\246m\033\267n\336.a\312\244i\023\247N\236\276\201\013\'n\034\271r\346>\201\n%j\024\251R\246\316\241K\247n\035\273v\356N\241J\245j\025\253V\256\336\301%\213\'o\036\275z\366^\301\212%k\026\255Z\266\356\341\313\247o\037\277~\376n\341\312\245k\027/\200\275|\005\004\000!\371\004\010\031\000\000\000,\000\000\000\000\020\000\020\000\000\010\377\000\377\000\n$h\020\241B\206~\000\t\"d\010\221\"F\016!J\244h\021\243F\216\216 I\242d\t\223&N\036A\212$i\022\245J\226\236@\211\"e\n\225*V.a\312\244i\023\247N\236\256`\311\242e\013\227.^>\201\n%j\024\251R\246\276\200\t#f\014\2312fN\241J\245j\025\253V\256\316\240I\243f\r\2336n^\301\212%k\026\255Z\266\336\300\211#g\016\235:vn\341\312\245k\027\257^\276\356\340\311\243g\017\237>~\376\001\010 `\000\201\002\006~\001\013&l\030\261b\306\016 H\240`\001\203\006\016\216!K\246l\031\263f\316\036@\210 a\002\205\n\026\236A\213&m\032\265j\326.`\310\240a\003\207\016\036\256a\313\246m\033\267n\336>\200\010!b\004\211\022&\276\201\013\'n\034\271r\346N\240H\241b\005\213\026.\316\241K\247n\035\273v\356^\300%\210!c\006\215\0326\336\301\213\'o\036\275z\366n\340\310\241c\007\217\036>\356\341\313\247o\037?\200\375\374\005\004\000;"
  }
}


================================================
FILE: tensorboardX/tests/expect_reader.py
================================================
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import sys


def removeWhiteChar(string):
    return string.replace(' ', '').replace('\t', '').replace('\n', '')


def compare_proto(str_to_compare, function_ptr):
    module_id = function_ptr.__class__.__module__
    functionName = function_ptr.id().split('.')[-1]
    test_file = os.path.realpath(sys.modules[module_id].__file__)
    expected_file = os.path.join(os.path.dirname(test_file),
                        "expect",
                        module_id.split('.')[-1] + '.' + functionName + ".expect")
    print("expected_file: %s" % expected_file)
    assert os.path.exists(expected_file)
    with open(expected_file) as f:
        expected = f.read()
    str_to_compare = str(str_to_compare)
    print("str_to_compare:", removeWhiteChar(str_to_compare))
    print("expected:", removeWhiteChar(expected))
    assert removeWhiteChar(str_to_compare) == removeWhiteChar(expected)


def write_proto(str_to_compare, function_ptr):
    module_id = function_ptr.__class__.__module__
    functionName = function_ptr.id().split('.')[-1]
    test_file = os.path.realpath(sys.modules[module_id].__file__)
    expected_file = os.path.join(os.path.dirname(test_file),
                    "expect",
                    module_id.split('.')[-1] + '.' + functionName + ".expect")
    print(expected_file)
    with open(expected_file, 'w') as f:
        f.write(str(str_to_compare))


================================================
FILE: tensorboardX/tests/record_writer_test.py
================================================
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# """Tests for RecordWriter"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import six
import os
from tensorboardX.record_writer import RecordWriter
from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New
import unittest


class RecordWriterTest(unittest.TestCase):
  def get_temp_dir(self):
    import tempfile
    return tempfile.mkdtemp()

  def test_expect_bytes_written(self):
    filename = os.path.join(self.get_temp_dir(), "expect_bytes_written")
    byte_len = 64
    w = RecordWriter(filename)
    bytes_to_write = b"x" * byte_len
    w.write(bytes_to_write)
    w.close()
    with open(filename, 'rb') as f:
      self.assertEqual(len(f.read()), (8 + 4 + byte_len + 4))  # uint64+uint32+data+uint32

  def test_empty_record(self):
    filename = os.path.join(self.get_temp_dir(), "empty_record")
    w = RecordWriter(filename)
    bytes_to_write = b""
    w.write(bytes_to_write)
    w.close()
    r = PyRecordReader_New(filename)
    r.GetNext()
    self.assertEqual(r.record(), bytes_to_write)

  def test_record_writer_roundtrip(self):
    filename = os.path.join(self.get_temp_dir(), "record_writer_roundtrip")
    w = RecordWriter(filename)
    bytes_to_write = b"hello world"
    times_to_test = 50
    for _ in range(times_to_test):
      w.write(bytes_to_write)
    w.close()

    r = PyRecordReader_New(filename)
    for i in range(times_to_test):
      r.GetNext()
      self.assertEqual(r.record(), bytes_to_write)

  # def test_expect_bytes_written_bytes_IO(self):
  #   byte_len = 64
  #   Bytes_io = six.BytesIO()
  #   w = RecordWriter(Bytes_io)
  #   bytes_to_write = b"x" * byte_len
  #   w.write(bytes_to_write)
  #   self.assertEqual(len(Bytes_io.getvalue()), (8 + 4 + byte_len + 4))  # uint64+uint32+data+uint32


if __name__ == '__main__':
  unittest.main()


================================================
FILE: tensorboardX/tests/test_beholder.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from tensorboardX import SummaryWriter
import numpy as np
import pytest
import unittest
import tensorboardX.beholder as beholder_lib
import tensorboardX.beholder.file_system_tools as fio
from collections import namedtuple


class BeholderTest(unittest.TestCase):
    def test_beholder(self):
        LOG_DIRECTORY = '/tmp/beholder-demo'
        tensor_and_name = namedtuple('tensor_and_name', 'tensor, name')
        fake_param = [tensor_and_name(np.random.randn(128, 768, 3), 'test' + str(i)) for i in range(5)]
        arrays = [tensor_and_name(np.random.randn(128, 768, 3), 'test' + str(i)) for i in range(5)]
        beholder = beholder_lib.Beholder(logdir=LOG_DIRECTORY)
        beholder.update(
            trainable=fake_param,
            arrays=arrays,
            frame=np.random.randn(128, 128),
        )

    def test_beholder_video(self):
        LOG_DIRECTORY = '/tmp/beholder-demo-recording'
        tensor_and_name = namedtuple('tensor_and_name', 'tensor, name')
        fake_param = [tensor_and_name(np.random.randn(128, 768, 3), 'test' + str(i)) for i in range(5)]
        arrays = [tensor_and_name(np.random.randn(128, 768, 3), 'test' + str(i)) for i in range(5)]
        beholder = beholder_lib.Beholder(logdir=LOG_DIRECTORY)
        pkl = fio.read_pickle(LOG_DIRECTORY + '/plugins/beholder/config.pkl')
        pkl['is_recording'] = True
        fio.write_pickle(pkl, LOG_DIRECTORY + '/plugins/beholder/config.pkl')
        for i in range(3):
            if i == 2:
                pkl = fio.read_pickle(LOG_DIRECTORY + '/plugins/beholder/config.pkl')
                pkl['is_recording'] = False
                fio.write_pickle(pkl, LOG_DIRECTORY + '/plugins/beholder/config.pkl')
            beholder.update(
                trainable=fake_param,
                arrays=arrays,
                frame=np.random.randn(128, 128),
            )


================================================
FILE: tensorboardX/tests/test_caffe2.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from tensorboardX import SummaryWriter
import os
import unittest

# try:
import numpy as np
import caffe2.python.brew as brew
import caffe2.python.cnn as cnn
import caffe2.python.core as core
import caffe2.python.model_helper as model_helper
from caffe2.proto import caffe2_pb2
from caffe2.python import workspace
import tensorboardX.caffe2_graph as tb
from tensorboardX import x2num
from .expect_reader import compare_proto, write_proto


class Caffe2Test(unittest.TestCase):
    def test_caffe2_np(self):
        workspace.FeedBlob("testBlob", np.random.randn(1, 3, 64, 64).astype(np.float32))
        assert isinstance(x2num.make_np('testBlob'), np.ndarray)
        # assert isinstance(x2num.make_np('testBlob', 'IMG'), np.ndarray)

    def test_that_operators_gets_non_colliding_names(self):
        op = caffe2_pb2.OperatorDef()
        op.type = 'foo'
        op.input.extend(['foo'])
        tb._fill_missing_operator_names([op])
        self.assertEqual(op.input[0], 'foo')
        self.assertEqual(op.name, 'foo_1')

    def test_that_replacing_colons_gives_non_colliding_names(self):
        # .. and update shapes
        op = caffe2_pb2.OperatorDef()
        op.name = 'foo:0'
        op.input.extend(['foo:0', 'foo$0'])
        shapes = {'foo:0': [1]}
        blob_name_tracker = tb._get_blob_names([op])
        tb._replace_colons(shapes, blob_name_tracker, [op], '$')
        self.assertEqual(op.input[0], 'foo$0')
        self.assertEqual(op.input[1], 'foo$0_1')
        # Collision but blobs and op names are handled later by
        # _fill_missing_operator_names.
        self.assertEqual(op.name, 'foo$0')
        self.assertEqual(len(shapes), 1)
        self.assertEqual(shapes['foo$0'], [1])
        self.assertEqual(len(blob_name_tracker), 2)
        self.assertEqual(blob_name_tracker['foo$0'], 'foo:0')
        self.assertEqual(blob_name_tracker['foo$0_1'], 'foo$0')

    def test_that_adding_gradient_scope_does_no_fancy_renaming(self):
        # because it cannot create collisions
        op = caffe2_pb2.OperatorDef()
        op.name = 'foo_grad'
        op.input.extend(['foo_grad', 'foo_grad_1'])
        shapes = {'foo_grad': [1]}
        blob_name_tracker = tb._get_blob_names([op])
        tb._add_gradient_scope(shapes, blob_name_tracker, [op])
        self.assertEqual(op.input[0], 'GRADIENTS/foo_grad')
        self.assertEqual(op.input[1], 'GRADIENTS/foo_grad_1')
        self.assertEqual(op.name, 'GRADIENTS/foo_grad')
        self.assertEqual(len(shapes), 1)
        self.assertEqual(shapes['GRADIENTS/foo_grad'], [1])
        self.assertEqual(len(blob_name_tracker), 2)
        self.assertEqual(
            blob_name_tracker['GRADIENTS/foo_grad'], 'foo_grad')
        self.assertEqual(
            blob_name_tracker['GRADIENTS/foo_grad_1'], 'foo_grad_1')

    def test_that_auto_ssa_gives_non_colliding_names(self):
        op1 = caffe2_pb2.OperatorDef()
        op1.output.extend(['foo'])
        op2 = caffe2_pb2.OperatorDef()
        op2.input.extend(['foo'])
        op2.output.extend(['foo'])
        op2.output.extend(['foo_1'])
        shapes = {'foo': [1], 'foo_1': [2]}
        blob_name_tracker = tb._get_blob_names([op1, op2])
        tb._convert_to_ssa(shapes, blob_name_tracker, [op1, op2])
        self.assertEqual(op1.output[0], 'foo')
        self.assertEqual(op2.input[0], 'foo')
        self.assertEqual(op2.output[0], 'foo_1')
        # Unfortunate name but we do not parse original `_` for now.
        self.assertEqual(op2.output[1], 'foo_1_1')
        self.assertEqual(len(shapes), 3)
        self.assertEqual(shapes['foo'], [1])
        self.assertEqual(shapes['foo_1'], [1])
        self.assertEqual(shapes['foo_1_1'], [2])
        self.assertEqual(len(blob_name_tracker), 3)
        self.assertEqual(blob_name_tracker['foo'], 'foo')
        self.assertEqual(blob_name_tracker['foo_1'], 'foo')
        self.assertEqual(blob_name_tracker['foo_1_1'], 'foo_1')

    def test_renaming_tensorflow_style(self):
        # Construct some dummy operators here
        # NOTE: '_w', '_bn', etc without the postfix '_' are only renamed when
        # they are at the very end of the name.
        # Test that '_w', '_w_' are renamed to '/weight', '/weight_', resp.
        op1 = caffe2_pb2.OperatorDef()
        op1.input.extend(['foo_w'])
        op1.output.extend(['foo_w_2'])
        # Test that '_bn', '_bn_' are renamed to '/batchnorm', '/batchnorm_',
        # respectively.
        op2 = caffe2_pb2.OperatorDef()
        op2.input.extend(['foo_bn'])
        op2.output.extend(['foo_bn_2'])
        # Test that '_b', '_b_', are renamed to '/bias', '/bias_', resp.
        op3 = caffe2_pb2.OperatorDef()
        op3.input.extend(['foo_b'])
        op3.output.extend(['foo_b_2'])
        # Test that '_s', '_s_', are renamed to '/scale', '/scale_', resp.
        op4 = caffe2_pb2.OperatorDef()
        op4.input.extend(['foo_s'])
        op4.output.extend(['foo_s_2'])
        # Test that '_sum', '_sum_', are renamed to '/sum', '/sum_', resp.
        op5 = caffe2_pb2.OperatorDef()
        op5.input.extend(['foo_sum'])
        op5.output.extend(['foo_sum_2'])
        # Test that '_branch', '_branch_', are renamed to '/branch', '/branch_',
        # respectively. Multiple inputs/outputs are also tested in this case.
        op6 = caffe2_pb2.OperatorDef()
        op6.input.extend(['foo_branch'])
        op6.input.extend(['test_branch_2'])
        op6.output.extend(['foo_branch_3'])
        op6.output.extend(['test_branch4'])
        shapes = {
            'foo_w': [1], 'foo_w_2': [2], 'foo_bn': [3], 'foo_bn_2': [4],
            'foo_b': [5], 'foo_b_2': [6], 'foo_s': [7], 'foo_s_2': [8],
            'foo_sum': [9], 'foo_sum_2': [10], 'foo_branch': [11],
            'test_branch_2': [12], 'foo_branch_3': [13], 'test_branch4': [14],
        }
        ops = [op1, op2, op3, op4, op5, op6]
        blob_name_tracker = tb._get_blob_names(ops)
        tb._rename_tensorflow_style(shapes, blob_name_tracker, ops)
        # Testing that keys in blob name tracker were renamed correctly
        self.assertEqual(blob_name_tracker['foo/weight'], 'foo_w')
        self.assertEqual(blob_name_tracker['foo/weight_2'], 'foo_w_2')
        self.assertEqual(blob_name_tracker['foo/batchnorm'], 'foo_bn')
        self.assertEqual(blob_name_tracker['foo/batchnorm_2'], 'foo_bn_2')
        self.assertEqual(blob_name_tracker['foo/bias'], 'foo_b')
        self.assertEqual(blob_name_tracker['foo/bias_2'], 'foo_b_2')
        self.assertEqual(blob_name_tracker['foo/scale'], 'foo_s')
        self.assertEqual(blob_name_tracker['foo/scale_2'], 'foo_s_2')
        self.assertEqual(blob_name_tracker['foo/sum'], 'foo_sum')
        self.assertEqual(blob_name_tracker['foo/sum_2'], 'foo_sum_2')
        self.assertEqual(blob_name_tracker['foo/branch'], 'foo_branch')
        self.assertEqual(blob_name_tracker['test/branch_2'], 'test_branch_2')
        self.assertEqual(blob_name_tracker['foo/branch_3'], 'foo_branch_3')
        self.assertEqual(blob_name_tracker['test/branch4'], 'test_branch4')
        # Testing that keys in shapes were renamed correctly
        self.assertEqual(shapes['foo/weight'], [1])
        self.assertEqual(shapes['foo/batchnorm_2'], [4])
        self.assertEqual(shapes['foo/sum'], [9])
        self.assertEqual(shapes['test/branch_2'], [12])
        # Testing that the ops were renamed correctly
        self.assertEqual(op1.input[0], 'foo/weight')
        self.assertEqual(op1.output[0], 'foo/weight_2')
        self.assertEqual(op2.input[0], 'foo/batchnorm')
        self.assertEqual(op2.output[0], 'foo/batchnorm_2')
        self.assertEqual(op3.input[0], 'foo/bias')
        self.assertEqual(op3.output[0], 'foo/bias_2')
        self.assertEqual(op4.input[0], 'foo/scale')
        self.assertEqual(op4.output[0], 'foo/scale_2')
        self.assertEqual(op5.input[0], 'foo/sum')
        self.assertEqual(op5.output[0], 'foo/sum_2')
        self.assertEqual(op6.input[0], 'foo/branch')
        self.assertEqual(op6.input[1], 'test/branch_2')
        self.assertEqual(op6.output[0], 'foo/branch_3')
        self.assertEqual(op6.output[1], 'test/branch4')

    def test_filter_ops(self):
        op1 = caffe2_pb2.OperatorDef()
        op1.input.extend(['remove_this'])
        op1.output.extend(['random_output'])
        op2 = caffe2_pb2.OperatorDef()
        op2.input.extend(['leave_this'])
        op2.output.extend(['leave_this_also'])
        op3 = caffe2_pb2.OperatorDef()
        op3.input.extend(['random_input'])
        op3.output.extend(['remove_this_also'])

        def filter_fn(blob):
            # Filter all blobs with names containing 'remove'
            return 'remove' not in str(blob)

        op_set1 = [op1, op2, op3]
        op_set2 = [op1, op2, op3]

        # Test case for when perform_filter = True.
        result_ops1 = tb._filter_ops(op_set1, filter_fn, True)
        new_op1, new_op2 = result_ops1[0], result_ops1[1]
        # input named 'remove_this' should have been filtered
        self.assertEqual(len(new_op1.input), 0)
        self.assertEqual(new_op1.output, ['random_output'])
        self.assertEqual(new_op2.input, ['leave_this'])
        self.assertEqual(new_op2.output, ['leave_this_also'])
        # output named 'remove_this_also' should have been filtered as well.
        # This should have also removed op3 as the filter function excludes ops
        # with no outputs.
        self.assertEqual(len(result_ops1), 2)

        # Test case for when perform_filter = False. op_set2 should remain
        # unchanged.
        result_ops2 = tb._filter_ops(op_set2, filter_fn, False)
        self.assertEqual(result_ops2, op_set2)

    # Use show_simplified=False. This shows the original style of graph
    # visualization from caffe2.contrib.tensorboard.
    # TODO: Add test for show_simplified=True.
    def test_simple_cnnmodel(self):
        model = cnn.CNNModelHelper("NCHW", name="overfeat")
        workspace.FeedBlob("data", np.random.randn(1, 3, 64, 64).astype(np.float32))
        workspace.FeedBlob("label", np.random.randn(1, 1000).astype(np.int))
        with core.NameScope("conv1"):
            conv1 = model.Conv("data", "conv1", 3, 96, 11, stride=4)
            relu1 = model.Relu(conv1, conv1)
            pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
        with core.NameScope("classifier"):
            fc = model.FC(pool1, "fc", 4096, 1000)
            pred = model.Softmax(fc, "pred")
            xent = model.LabelCrossEntropy([pred, "label"], "xent")
            loss = model.AveragedLoss(xent, "loss")

        blob_name_tracker = {}
        graph = tb.model_to_graph_def(
            model,
            blob_name_tracker=blob_name_tracker,
            shapes={},
            show_simplified=False,
        )

        compare_proto(graph, self)

    # cnn.CNNModelHelper is deprecated, so we also test with
    # model_helper.ModelHelper. The model used in this test is taken from the
    # Caffe2 MNIST tutorial. Also use show_simplified=False here.
    def test_simple_model(self):
        model = model_helper.ModelHelper(name="mnist")
        # how come those inputs don't break the forward pass =.=a
        workspace.FeedBlob("data", np.random.randn(1, 3, 64, 64).astype(np.float32))
        workspace.FeedBlob("label", np.random.randn(1, 1000).astype(np.int))

        with core.NameScope("conv1"):
            conv1 = brew.conv(model, "data", 'conv1', dim_in=1, dim_out=20, kernel=5)
            # Image size: 24 x 24 -> 12 x 12
            pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
            # Image size: 12 x 12 -> 8 x 8
            conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=100, kernel=5)
            # Image size: 8 x 8 -> 4 x 4
            pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
        with core.NameScope("classifier"):
            # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size
            fc3 = brew.fc(model, pool2, 'fc3', dim_in=100 * 4 * 4, dim_out=500)
            relu = brew.relu(model, fc3, fc3)
            pred = brew.fc(model, relu, 'pred', 500, 10)
            softmax = brew.softmax(model, pred, 'softmax')
            xent = model.LabelCrossEntropy([softmax, "label"], 'xent')
            # compute the expected loss
            loss = model.AveragedLoss(xent, "loss")
        model.net.RunAllOnMKL()
        model.param_init_net.RunAllOnMKL()
        model.AddGradientOperators([loss], skip=1)
        blob_name_tracker = {}
        graph = tb.model_to_graph_def(
            model,
            blob_name_tracker=blob_name_tracker,
            shapes={},
            show_simplified=False,
        )

        compare_proto(graph, self)


if __name__ == "__main__":
    unittest.main()


================================================
FILE: tensorboardX/tests/test_chainer_np.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from tensorboardX import x2num, SummaryWriter
try:
    import chainer
    chainer_installed = True
except ImportError:
    print('Chainer is not installed, skipping test')
    chainer_installed = False
import numpy as np
import unittest


if chainer_installed:
    chainer.Variable
    tensors = [chainer.Variable(np.random.rand(3, 10, 10)),
               chainer.Variable(np.random.rand(1)),
               chainer.Variable(np.random.rand(1, 2, 3, 4, 5))]

    class ChainerTest(unittest.TestCase):
        def test_chainer_np(self):
            for tensor in tensors:
                # regular variable
                assert isinstance(x2num.make_np(tensor), np.ndarray)

            # python primitive type
            assert(isinstance(x2num.make_np(0), np.ndarray))
            assert(isinstance(x2num.make_np(0.1), np.ndarray))

        def test_chainer_img(self):
            shapes = [(77, 3, 13, 7), (77, 1, 13, 7), (3, 13, 7), (1, 13, 7), (13, 7)]
            for s in shapes:
                x = chainer.Variable(np.random.random_sample(s))
                # assert x2num.make_np(x, 'IMG').shape[2] == 3

        def test_chainer_write(self):
            with SummaryWriter() as w:
                w.add_scalar('scalar', chainer.Variable(np.random.rand(1)), 0)


================================================
FILE: tensorboardX/tests/test_crc32c.py
================================================
import unittest
from tensorboardX.crc32c import _crc32c, _crc32c_native, crc32c


class CRC32CTest(unittest.TestCase):
    def test_crc32c(self):
        data = b'abcd'
        assert crc32c(data) == 0x92c80a31

    def test_crc32c_python(self):
        data = b'abcd'
        assert _crc32c(data) == 0x92c80a31

    def test_crc32c_native(self):
        if _crc32c_native is None:
            return
        data = b'abcd'
        assert _crc32c_native(data) == 0x92c80a31


================================================
FILE: tensorboardX/tests/test_embedding.py
================================================
import unittest
import torch
from tensorboardX import SummaryWriter


class EmbeddingTest(unittest.TestCase):
    def test_embedding(self):
        w = SummaryWriter()
        all_features = torch.Tensor([[1, 2, 3], [5, 4, 1], [3, 7, 7]])
        all_labels = torch.Tensor([33, 44, 55])
        all_images = torch.zeros(3, 3, 5, 5)

        w.add_embedding(all_features,
                        metadata=all_labels,
                        label_img=all_images,
                        global_step=2)

        dataset_label = ['test'] * 2 + ['train'] * 2
        all_labels = list(zip(all_labels, dataset_label))
        w.add_embedding(all_features,
                        metadata=all_labels,
                        label_img=all_images,
                        metadata_header=['digit', 'dataset'],
                        global_step=2)
        # assert...

    def test_embedding_64(self):
        w = SummaryWriter()
        all_features = torch.Tensor([[1, 2, 3], [5, 4, 1], [3, 7, 7]])
        all_labels = torch.Tensor([33, 44, 55])
        all_images = torch.zeros((3, 3, 5, 5), dtype=torch.float64)

        w.add_embedding(all_features,
                        metadata=all_labels,
                        label_img=all_images,
                        global_step=2)

        dataset_label = ['test'] * 2 + ['train'] * 2
        all_labels = list(zip(all_labels, dataset_label))
        w.add_embedding(all_features,
                        metadata=all_labels,
                        label_img=all_images,
                        metadata_header=['digit', 'dataset'],
                        global_step=2)

    def test_embedding_square(self):
        w = SummaryWriter(comment='sq')
        all_features = torch.rand(228,256)
        all_images = torch.rand(228, 3, 32, 32)
        for i in range(all_images.shape[0]):
            all_images[i] *= (float(i)+60)/(all_images.shape[0]+60)
        w.add_embedding(all_features,
                        label_img=all_images,
                        global_step=2)

    def test_embedding_fail(self):
        with self.assertRaises(AssertionError):
            w = SummaryWriter(comment='shouldfail')
            all_features = torch.rand(228,256)
            all_images = torch.rand(228, 3, 16, 32)
            for i in range(all_images.shape[0]):
                all_images[i] *= (float(i)+60)/(all_images.shape[0]+60)
            w.add_embedding(all_features,
                            label_img=all_images,
                            global_step=2)


================================================
FILE: tensorboardX/tests/test_figure.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import matplotlib.pyplot as plt
import unittest

from tensorboardX import SummaryWriter


class FigureTest(unittest.TestCase):
    def test_figure(self):
        writer = SummaryWriter()

        figure, axes = plt.figure(), plt.gca()
        circle1 = plt.Circle((0.2, 0.5), 0.2, color='r')
        circle2 = plt.Circle((0.8, 0.5), 0.2, color='g')
        axes.add_patch(circle1)
        axes.add_patch(circle2)
        plt.axis('scaled')
        plt.tight_layout()

        writer.add_figure("add_figure/figure", figure, 0, close=False)
        assert plt.fignum_exists(figure.number) is True

        writer.add_figure("add_figure/figure", figure, 1)
        assert plt.fignum_exists(figure.number) is False

        writer.close()

    def test_figure_list(self):
        writer = SummaryWriter()

        figures = []
        for i in range(5):
            figure = plt.figure()
            plt.plot([i * 1, i * 2, i * 3], label="Plot " + str(i))
            plt.xlabel("X")
            plt.xlabel("Y")
            plt.legend()
            plt.tight_layout()
            figures.append(figure)

        writer.add_figure("add_figure/figure_list", figures, 0, close=False)
        assert all([plt.fignum_exists(figure.number) is True for figure in figures])

        writer.add_figure("add_figure/figure_list", figures, 1)
        assert all([plt.fignum_exists(figure.number) is False for figure in figures])

        writer.close()


================================================
FILE: tensorboardX/tests/test_numpy.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import unittest

from tensorboardX import x2num


class NumpyTest(unittest.TestCase):
    def test_scalar(self):
        res = x2num.make_np(1.1)
        assert isinstance(res, np.ndarray) and res.shape == (1,)
        res = x2num.make_np(1 << 64 - 1)  # uint64_max
        assert isinstance(res, np.ndarray) and res.shape == (1,)
        res = x2num.make_np(np.float16(1.00000087))
        assert isinstance(res, np.ndarray) and res.shape == (1,)
        res = x2num.make_np(np.float128(1.00008 + 9))
        assert isinstance(res, np.ndarray) and res.shape == (1,)
        res = x2num.make_np(np.int64(100000000000))
        assert isinstance(res, np.ndarray) and res.shape == (1,)

    def test_make_grid(self):
        pass

    def test_numpy_vid(self):
        shapes = [(16, 3, 30, 28, 28), (19, 3, 30, 28, 28), (19, 3, 29, 23, 19)]
        for s in shapes:
            x = np.random.random_sample(s)
            # assert x2num.make_np(x, 'VID').shape[3] == 3

    def test_numpy_vid_uint8(self):
        x = np.random.randint(0, 256, (16, 3, 30, 28, 28)).astype(np.uint8)
        # x2num.make_np(x, 'VID').shape[3] == 3


================================================
FILE: tensorboardX/tests/test_onnx_graph.py
================================================
import unittest
import torch
from tensorboardX import SummaryWriter


class ONNXGraphTest(unittest.TestCase):
    def test_onnx_graph(self):
        import subprocess
        zoo_address = 'https://onnxzoo.blob.core.windows.net/models/opset_8/mnist/mnist.tar.gz'

        res = subprocess.call(['wget', '-nc', zoo_address])
        assert res == 0, 'cannot download example onnx model from the zoo'
        res = subprocess.call(['tar', 'xf', 'mnist.tar.gz', '-C', 'examples/', 'mnist/model.onnx'])

        with SummaryWriter() as w:
            w.add_onnx_graph('examples/mnist/model.onnx')


================================================
FILE: tensorboardX/tests/test_pr_curve.py
================================================
import unittest
import torch
import numpy as np
from tensorboardX import SummaryWriter
from tensorboardX import summary
from .expect_reader import compare_proto

np.random.seed(0)
true_positive_counts = [75, 64, 21, 5, 0]
false_positive_counts = [150, 105, 18, 0, 0]
true_negative_counts = [0, 45, 132, 150, 150]
false_negative_counts = [0, 11, 54, 70, 75]
precision = [0.3333333, 0.3786982, 0.5384616, 1.0, 0.0]
recall = [1.0, 0.8533334, 0.28, 0.0666667, 0.0]


class PRCurveTest(unittest.TestCase):
    def test_smoke(self):
        with SummaryWriter() as writer:
            writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(
                100), 1)
            writer.add_pr_curve_raw('prcurve with raw data',
                                    true_positive_counts,
                                    false_positive_counts,
                                    true_negative_counts,
                                    false_negative_counts,
                                    precision,
                                    recall,
                                    1)

    def test_pr_purve(self):
        random_labels = np.array([0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
            1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0,
            0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
            1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0,
            1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0])
        random_probs = np.array([0.33327776, 0.30032885, 0.79012837, 0.04306813, 0.65221544,
            0.58481968, 0.28305522, 0.53795795, 0.00729739, 0.52266951,
            0.22464247, 0.11262435, 0.41573075, 0.92493992, 0.73066758,
            0.43867735, 0.27955449, 0.56975382, 0.53933028, 0.34392824,
            0.30312509, 0.81732807, 0.55408544, 0.3969487 , 0.31768033,
            0.24353266, 0.47198005, 0.19999122, 0.05788022, 0.24046305,
            0.04651082, 0.30061738, 0.78321545, 0.82670207, 0.49200517,
            0.80904619, 0.96711993, 0.3160946 , 0.01049424, 0.60108337,
            0.56508792, 0.83729429, 0.9717386 , 0.46306053, 0.80232138,
            0.24166823, 0.7393237 , 0.50820418, 0.04944932, 0.53854157,
            0.10765172, 0.84723855, 0.20518299, 0.3143431 , 0.51299074,
            0.47065695, 0.54267833, 0.1812676 , 0.06265177, 0.34110327,
            0.30915171, 0.91870169, 0.91309447, 0.31395817, 0.36780571,
            0.98297986, 0.00594547, 0.52839042, 0.70229202, 0.37779588,
            0.15207045, 0.59759632, 0.72397032, 0.71502195, 0.90135725,
            0.43970107, 0.17123532, 0.08785938, 0.04986818, 0.62702444,
            0.69171023, 0.30537792, 0.30285433, 0.27124347, 0.27693729,
            0.7136039 , 0.48022489, 0.20916285, 0.2018599 , 0.92401008,
            0.30189681, 0.46862626, 0.96353024, 0.30468533, 0.68281294,
            0.30623562, 0.40795975, 0.76824531, 0.89824215, 0.69845035], dtype=np.float16)
        compare_proto(summary.pr_curve('tag', random_labels, random_probs, 1), self)

    def test_pr_purve_raw(self):
        compare_proto(summary.pr_curve_raw('prcurve with raw data',
                                           true_positive_counts,
                                           false_positive_counts,
                                           true_negative_counts,
                                           false_negative_counts,
                                           precision,
                                           recall,
                                           1),
                      self)


================================================
FILE: tensorboardX/tests/test_pytorch_graph.py
================================================
from __future__ import absolute_import, division, print_function, unicode_literals
import unittest
import torch
from tensorboardX import SummaryWriter


class PytorchGraphTest(unittest.TestCase):
    def test_pytorch_graph(self):
        dummy_input = (torch.zeros(1, 3),)

        class myLinear(torch.nn.Module):
            def __init__(self):
                super(myLinear, self).__init__()
                self.linear = torch.nn.Linear(3, 5)

            def forward(self, x):
                return self.linear(x)

        with SummaryWriter(comment='LinearModel') as w:
            w.add_graph(myLinear(), dummy_input, True)

    def test_wrong_input_size(self):
        print('expect error here:')
        with self.assertRaises(RuntimeError):
            dummy_input = torch.rand(1, 9)
            model = torch.nn.Linear(3, 5)
            with SummaryWriter(comment='expect_error') as w:
                w.add_graph(model, dummy_input)  # error


================================================
FILE: tensorboardX/tests/test_pytorch_np.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from tensorboardX import x2num, SummaryWriter
import torch
import numpy as np
import unittest


class PyTorchNumpyTest(unittest.TestCase):
    def test_pytorch_np(self):
        tensors = [torch.rand(3, 10, 10), torch.rand(1), torch.rand(1, 2, 3, 4, 5)]
        for tensor in tensors:
            # regular tensor
            assert isinstance(x2num.make_np(tensor), np.ndarray)

            # CUDA tensor
            if torch.cuda.device_count() > 0:
                assert isinstance(x2num.make_np(tensor.cuda()), np.ndarray)

            # regular variable
            assert isinstance(x2num.make_np(torch.autograd.Variable(tensor)), np.ndarray)

            # CUDA variable
            if torch.cuda.device_count() > 0:
                assert isinstance(x2num.make_np(torch.autograd.Variable(tensor).cuda()), np.ndarray)

        # python primitive type
        assert(isinstance(x2num.make_np(0), np.ndarray))
        assert(isinstance(x2num.make_np(0.1), np.ndarray))

    def test_pytorch_write(self):
        with SummaryWriter() as w:
            w.add_scalar('scalar', torch.autograd.Variable(torch.rand(1)), 0)

    def test_pytorch_histogram(self):
        with SummaryWriter() as w:
            w.add_histogram('float histogram', torch.rand((50,)))
            w.add_histogram('int histogram', torch.randint(0, 100, (50,)))

    def test_pytorch_histogram_raw(self):
        with SummaryWriter() as w:
            num = 50
            floats = x2num.make_np(torch.rand((num,)))
            bins = [0.0, 0.25, 0.5, 0.75, 1.0]
            counts, limits = np.histogram(floats, bins)
            sum_sq = floats.dot(floats).item()
            w.add_histogram_raw('float histogram raw',
                                min=floats.min().item(),
                                max=floats.max().item(),
                                num=num,
                                sum=floats.sum().item(),
                                sum_squares=sum_sq,
                                bucket_limits=limits[1:].tolist(),
                                bucket_counts=counts.tolist())

            ints = x2num.make_np(torch.randint(0, 100, (num,)))
            bins = [0, 25, 50, 75, 100]
            counts, limits = np.histogram(ints, bins)
            sum_sq = ints.dot(ints).item()
            w.add_histogram_raw('int histogram raw',
                                min=ints.min().item(),
                                max=ints.max().item(),
                                num=num,
                                sum=ints.sum().item(),
                                sum_squares=sum_sq,
                                bucket_limits=limits[1:].tolist(),
                                bucket_counts=counts.tolist())


================================================
FILE: tensorboardX/tests/test_record_writer.py
================================================
from tensorboardX import SummaryWriter
import unittest
from tensorboardX.record_writer import S3RecordWriter, make_valid_tf_name
import os
import boto3
from moto import mock_s3

os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key")
os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret")


class RecordWriterTest(unittest.TestCase):
    @mock_s3
    def test_record_writer_s3(self):
        client = boto3.client('s3', region_name='us-east-1')
        client.create_bucket(Bucket='this')
        writer = S3RecordWriter('s3://this/is/apen')
        bucket, path = writer.bucket_and_path()
        assert bucket == 'this'
        assert path == 'is/apen'
        writer.write(bytes(42))
        writer.flush()

    def test_make_valid_tf_name(self):
        newname = make_valid_tf_name('$ave/&sound')
        assert newname == '._ave/_sound'


================================================
FILE: tensorboardX/tests/test_summary.py
================================================
from __future__ import absolute_import, division, print_function, unicode_literals
from tensorboardX import summary
from .expect_reader import compare_proto, write_proto
import numpy as np
import pytest
import unittest
# compare_proto = write_proto  # massive update expect

def tensor_N(shape, dtype=float):
    numel = np.prod(shape)
    x = (np.arange(numel, dtype=dtype)).reshape(shape)
    return x

class SummaryTest(unittest.TestCase):
    def test_uint8_image(self):
        '''
        Tests that uint8 image (pixel values in [0, 255]) is not changed
        '''
        test_image = tensor_N(shape=(3, 32, 32), dtype=np.uint8)
        compare_proto(summary.image('dummy', test_image), self)

    def test_float32_image(self):
        '''
        Tests that float32 image (pixel values in [0, 1]) are scaled correctly
        to [0, 255]
        '''
        test_image = tensor_N(shape=(3, 32, 32))
        compare_proto(summary.image('dummy', test_image), self)

    def test_float_1_converts_to_uint8_255(self):
        green_uint8 = np.array([[[0, 255, 0]]], dtype='uint8') 
        green_float32 = np.array([[[0, 1, 0]]], dtype='float32') 

        a = summary.image(tensor=green_uint8, tag='')
        b = summary.image(tensor=green_float32, tag='')
        self.assertEqual(a, b)

    def test_list_input(self):
        with pytest.raises(Exception):
            summary.histogram('dummy', [1, 3, 4, 5, 6], 'tensorflow')

    def test_empty_input(self):
        print('expect error here:')
        with pytest.raises(Exception):
            summary.histogram('dummy', np.ndarray(0), 'tensorflow')

    def test_image_with_boxes(self):
        compare_proto(summary.image_boxes('dummy',
                            tensor_N(shape=(3, 32, 32)),
                            np.array([[10, 10, 40, 40]])), self)

    def test_image_with_one_channel(self):
        compare_proto(summary.image('dummy', tensor_N(shape=(1, 8, 8)), dataformats='CHW'), self)

    def test_image_with_four_channel(self):
        compare_proto(summary.image('dummy', tensor_N(shape=(4, 8, 8)), dataformats='CHW'), self)

    def test_image_with_one_channel_batched(self):
        compare_proto(summary.image('dummy', tensor_N(shape=(2, 1, 8, 8)), dataformats='NCHW'), self)

    def test_image_with_3_channel_batched(self):
        compare_proto(summary.image('dummy', tensor_N(shape=(2, 3, 8, 8)), dataformats='NCHW'), self)

    def test_image_with_four_channel_batched(self):
        compare_proto(summary.image('dummy', tensor_N(shape=(2, 4, 8, 8)), dataformats='NCHW'), self)

    def test_image_without_channel(self):
        compare_proto(summary.image('dummy', tensor_N(shape=(8, 8)), dataformats='HW'), self)

    def test_video(self):
        try:
            import moviepy
        except ImportError:
            return
        compare_proto(summary.video('dummy', tensor_N(shape=(4, 3, 1, 8, 8))), self)
        summary.video('dummy', tensor_N(shape=(16, 48, 1, 28, 28)))
        summary.video('dummy', tensor_N(shape=(20, 7, 1, 8, 8)))

    def test_audio(self):
        compare_proto(summary.audio('dummy', tensor_N(shape=(42,))), self)

    def test_text(self):
        compare_proto(summary.text('dummy', 'text 123'), self)

    def test_histogram_auto(self):
        compare_proto(summary.histogram('dummy', tensor_N(shape=(1024,)), bins='auto', max_bins=5), self)

    def test_histogram_fd(self):
        compare_proto(summary.histogram('dummy', tensor_N(shape=(1024,)), bins='fd', max_bins=5), self)

    def test_histogram_doane(self):
        compare_proto(summary.histogram('dummy', tensor_N(shape=(1024,)), bins='doane', max_bins=5), self)

    def test_custom_scalars(self):
        layout = {'Taiwan': {'twse': ['Multiline', ['twse/0050', 'twse/2330']]},
                    'USA': {'dow': ['Margin', ['dow/aaa', 'dow/bbb', 'dow/ccc']],
                            'nasdaq': ['Margin', ['nasdaq/aaa', 'nasdaq/bbb', 'nasdaq/ccc']]}}
        summary.custom_scalars(layout)  # smoke test only.

    def test_mesh(self):
        vertices_tensor = np.array([[
            [1, 1, 1],
            [-1, -1, 1],
            [1, -1, -1],
            [-1, 1, -1],
        ]], dtype=float)
        colors_tensor = np.array([[
            [255, 0, 0],
            [0, 255, 0],
            [0, 0, 255],
            [255, 0, 255],
        ]], dtype=int)
        faces_tensor = np.array([[
            [0, 2, 3],
            [0, 3, 1],
            [0, 1, 2],
            [1, 3, 2],
        ]], dtype=int)
        compare_proto(summary.mesh('my_mesh', vertices=vertices_tensor, colors=colors_tensor, faces=faces_tensor), self)

    # It's hard to get dictionary sorted with same result in various envs. So only use one.
    def test_hparams(self):
        hp = {'lr': 0.1}
        mt = {'accuracy': 0.1}
        compare_proto(summary.hparams(hp, mt), self)

    def test_hparams_smoke(self):
        hp = {'lr': 0.1, 'bsize': 4}
        mt = {'accuracy': 0.1, 'loss': 10}
        summary.hparams(hp, mt)
        
        hp = {'string': "1b", 'use magic': True}
        summary.hparams(hp, mt)


================================================
FILE: tensorboardX/tests/test_summary_writer.py
================================================
from tensorboardX import SummaryWriter
import unittest


class SummaryWriterTest(unittest.TestCase):
    def test_summary_writer_ctx(self):
        # after using a SummaryWriter as a ctx it should be closed
        with SummaryWriter(filename_suffix='.test') as writer:
            writer.add_scalar('test', 1)
        assert writer.file_writer is None

    def test_summary_writer_backcomapt(self):
        with SummaryWriter(log_dir='/tmp/tbxtest') as writer:
            writer.add_scalar('test', 1)

    def test_summary_writer_close(self):
        # Opening and closing SummaryWriter a lot should not run into
        # OSError: [Errno 24] Too many open files
        passed = True
        try:
            writer = SummaryWriter()
            writer.close()
        except OSError:
            passed = False

        assert passed

    def test_windowsPath(self):
        dummyPath = "C:\\Downloads\\fjoweifj02utj43tj430"
        with SummaryWriter(dummyPath) as writer:
            writer.add_scalar('test', 1)
        import shutil
        shutil.rmtree(dummyPath)

    def test_pathlib(self):
        import sys
        if sys.version_info.major == 2:
            import pathlib2 as pathlib
        else:
            import pathlib
        p = pathlib.Path('./pathlibtest')
        with SummaryWriter(p) as writer:
            writer.add_scalar('test', 1)
        import shutil
        shutil.rmtree(str(p))


================================================
FILE: tensorboardX/tests/test_test.py
================================================
def test_linting():
    import subprocess
    # subprocess.check_output(['flake8', 'tensorboardX'])


================================================
FILE: tensorboardX/tests/test_utils.py
================================================
from tensorboardX import summary
from tensorboardX.utils import make_grid, _prepare_video, convert_to_HWC
import numpy as np
import pytest
import unittest


class UtilsTest(unittest.TestCase):
    def test_to_HWC(self):
        np.random.seed(1)
        test_image = np.random.randint(0, 256, size=(3, 32, 32), dtype=np.uint8)
        converted = convert_to_HWC(test_image, 'chw')
        assert converted.shape == (32, 32, 3)
        test_image = np.random.randint(0, 256, size=(16, 3, 32, 32), dtype=np.uint8)
        converted = convert_to_HWC(test_image, 'nchw')
        assert converted.shape == (64, 256, 3)
        test_image = np.random.randint(0, 256, size=(32, 32), dtype=np.uint8)
        converted = convert_to_HWC(test_image, 'hw')
        assert converted.shape == (32, 32, 3)

    def test_prepare_video(self):
        # at each timestep the sum over all other dimensions of the video should stay the same
        np.random.seed(1)
        V_before = np.random.random((4, 10, 3, 20, 20))
        V_after = _prepare_video(np.copy(V_before))
        V_before = np.swapaxes(V_before, 0, 1)
        V_before = np.reshape(V_before, newshape=(10, -1))
        V_after = np.reshape(V_after, newshape=(10, -1))
        np.testing.assert_array_almost_equal(np.sum(V_before, axis=1), np.sum(V_after, axis=1))


================================================
FILE: tensorboardX/tests/test_visdom.py
================================================
from tensorboardX import TorchVis

import numpy as np
import pytest
import unittest

true_positive_counts = [75, 64, 21, 5, 0]
false_positive_counts = [150, 105, 18, 0, 0]
true_negative_counts = [0, 45, 132, 150, 150]
false_negative_counts = [0, 11, 54, 70, 75]
precision = [0.3333333, 0.3786982, 0.5384616, 1.0, 0.0]
recall = [1.0, 0.8533334, 0.28, 0.0666667, 0.0]


class VisdomTest(unittest.TestCase):
    def test_TorchVis(self):
        w = TorchVis('visdom')
        w.add_scalar('scalar_visdom', 1, 0)
        w.add_scalar('scalar_visdom', 2, 1)
        w.add_histogram('histogram_visdom', np.array([1, 2, 3, 4, 5]), 1)
        w.add_image('image_visdom', np.ndarray((3, 20, 20)), 2)
        # w.add_video('video_visdom', np.ndarray((1, 3, 10, 20, 20)), 3)
        w.add_audio('audio_visdom', [1, 2, 3, 4, 5])
        w.add_text('text_visdom', 'mystring')
        w.add_pr_curve('pr_curve_visdom', np.random.randint(2, size=100), np.random.rand(100), 10)
        w.add_pr_curve_raw('prcurve with raw data',
                           true_positive_counts,
                           false_positive_counts,
                           true_negative_counts,
                           false_negative_counts,
                           precision,
                           recall, 20)
        del w


================================================
FILE: tensorboardX/tests/test_writer.py
================================================
from tensorboardX import SummaryWriter
from tensorboard.compat.tensorflow_stub.pywrap_tensorflow import PyRecordReader_New
from tensorboardX.proto import event_pb2

import numpy as np
import pytest
import unittest
import time
freqs = [262, 294, 330, 349, 392, 440, 440, 440, 440, 440, 440]

true_positive_counts = [75, 64, 21, 5, 0]
false_positive_counts = [150, 105, 18, 0, 0]
true_negative_counts = [0, 45, 132, 150, 150]
false_negative_counts = [0, 11, 54, 70, 75]
precision = [0.3333333, 0.3786982, 0.5384616, 1.0, 0.0]
recall = [1.0, 0.8533334, 0.28, 0.0666667, 0.0]


class WriterTest(unittest.TestCase):
    def test_flush(self):
        N_TEST = 5
        w = SummaryWriter(flush_secs=1)
        f = w.file_writer.event_writer._ev_writer._file_name
        for i in range(N_TEST):
            w.add_scalar('a', i)
            time.sleep(2)
        r = PyRecordReader_New(f)
        r.GetNext()  # meta data, so skip
        for _ in range(N_TEST):  # all of the data should be flushed
            r.GetNext()

    def test_flush_timer_is_long_so_data_is_not_there(self):
        with self.assertRaises(BaseException):
            N_TEST = 5
            w = SummaryWriter(flush_secs=20)
            f = w.file_writer.event_writer._ev_writer._file_name
            for i in range(N_TEST):
                w.add_scalar('a', i)
                time.sleep(2)
            r = PyRecordReader_New(f)
            r.GetNext()  # meta data, so skip
            for _ in range(N_TEST):  # missing data
                r.GetNext()

    def test_flush_after_close(self):
        N_TEST = 5
        w = SummaryWriter(flush_secs=20)
        f = w.file_writer.event_writer._ev_writer._file_name
        for i in range(N_TEST):
            w.add_scalar('a', i)
            time.sleep(2)
        w.close()
        r = PyRecordReader_New(f)
        r.GetNext()  # meta data, so skip
        for _ in range(N_TEST):  # all of the data should be flushed
            r.GetNext()

    def test_flush(self):
        N_TEST = 5
        w = SummaryWriter(flush_secs=20)
        f = w.file_writer.event_writer._ev_writer._file_name
        for i in range(N_TEST):
            w.add_scalar('a', i)
            time.sleep(2)
        w.flush()
        r = PyRecordReader_New(f)
        r.GetNext()  # meta data, so skip
        for _ in range(N_TEST):  # all of the data should be flushed
            r.GetNext()

    def test_auto_close(self):
        pass

    def test_writer(self):
        with SummaryWriter() as writer:
            sample_rate = 44100

            n_iter = 0
            writer.add_scalar('data/scalar_systemtime', 0.1, n_iter)
            writer.add_scalar('data/scalar_customtime', 0.2, n_iter, walltime=n_iter)
            writer.add_scalars('data/scalar_group', {"xsinx": n_iter * np.sin(n_iter),
                                                     "xcosx": n_iter * np.cos(n_iter),
                                                     "arctanx": np.arctan(n_iter)}, n_iter)
            x = np.zeros((32, 3, 64, 64))  # output from network
            writer.add_images('Image', x, n_iter)  # Tensor
            writer.add_image_with_boxes('imagebox',
                                        np.zeros((3, 64, 64)),
                                        np.array([[10, 10, 40, 40], [40, 40, 60, 60]]),
                                        n_iter)
            x = np.zeros(sample_rate * 2)

            writer.add_audio('myAudio', x, n_iter)
            writer.add_video('myVideo', np.random.rand(16, 48, 1, 28, 28).astype(np.float32), n_iter)
            writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter)
            writer.add_text('markdown Text', '''a|b\n-|-\nc|d''', n_iter)
            writer.add_histogram('hist', np.random.rand(100, 100), n_iter)
            writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(
                100), n_iter)  # needs tensorboard 0.4RC or later
            writer.add_pr_curve_raw('prcurve with raw data', true_positive_counts,
                                    false_positive_counts,
                                    true_negative_counts,
                                    false_negative_counts,
                                    precision,
                                    recall, n_iter)
            # export scalar data to JSON for external processing
            writer.export_scalars_to_json("./all_scalars.json")
            imgs = []
            for i in range(5):
                imgs.append(np.ones((3, 100, 110)))
            with SummaryWriter() as w:
                w.add_images('img_list', imgs, dataformats='CHW')