Repository: ZYM-PKU/UDiffText
Branch: main
Commit: 9c9d0ab5b468
Files: 142
Total size: 749.6 KB

Directory structure:
gitextract_cem8xir5/

├── .gitignore
├── LICENSE
├── README.md
├── configs/
│   ├── dataset/
│   │   ├── icd13.yaml
│   │   ├── locr.yaml
│   │   ├── st.yaml
│   │   └── tsg.yaml
│   ├── demo.yaml
│   ├── pretrain.yaml
│   ├── test/
│   │   └── textdesign_sd_2.yaml
│   ├── test.yaml
│   ├── train/
│   │   └── textdesign_sd_2.yaml
│   └── train.yaml
├── dataset/
│   ├── __init__.py
│   ├── dataloader.py
│   └── utils/
│       └── words.txt
├── demo.py
├── metrics.py
├── pretrain.py
├── requirements.txt
├── scripts/
│   └── preprocess/
│       └── laion_ocr_pre.ipynb
├── sgm/
│   ├── __init__.py
│   ├── lr_scheduler.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── autoencoder.py
│   │   └── diffusion.py
│   ├── modules/
│   │   ├── __init__.py
│   │   ├── attention.py
│   │   ├── autoencoding/
│   │   │   ├── __init__.py
│   │   │   ├── losses/
│   │   │   │   └── __init__.py
│   │   │   └── regularizers/
│   │   │       └── __init__.py
│   │   ├── diffusionmodules/
│   │   │   ├── __init__.py
│   │   │   ├── denoiser.py
│   │   │   ├── denoiser_scaling.py
│   │   │   ├── denoiser_weighting.py
│   │   │   ├── discretizer.py
│   │   │   ├── guiders.py
│   │   │   ├── loss.py
│   │   │   ├── model.py
│   │   │   ├── openaimodel.py
│   │   │   ├── sampling.py
│   │   │   ├── sampling_utils.py
│   │   │   ├── sigma_sampling.py
│   │   │   ├── util.py
│   │   │   └── wrappers.py
│   │   ├── distributions/
│   │   │   ├── __init__.py
│   │   │   └── distributions.py
│   │   ├── ema.py
│   │   ├── encoders/
│   │   │   ├── __init__.py
│   │   │   └── modules.py
│   │   └── predictors/
│   │       └── model.py
│   └── util.py
├── src/
│   └── parseq/
│       ├── .gitignore
│       ├── Datasets.md
│       ├── LICENSE
│       ├── NOTICE
│       ├── README.md
│       ├── bench.py
│       ├── configs/
│       │   ├── bench.yaml
│       │   ├── charset/
│       │   │   ├── 36_lowercase.yaml
│       │   │   ├── 62_mixed-case.yaml
│       │   │   └── 94_full.yaml
│       │   ├── dataset/
│       │   │   ├── real.yaml
│       │   │   └── synth.yaml
│       │   ├── experiment/
│       │   │   ├── abinet-sv.yaml
│       │   │   ├── abinet.yaml
│       │   │   ├── crnn.yaml
│       │   │   ├── parseq-patch16-224.yaml
│       │   │   ├── parseq-tiny.yaml
│       │   │   ├── parseq.yaml
│       │   │   ├── trba.yaml
│       │   │   ├── trbc.yaml
│       │   │   ├── tune_abinet-lm.yaml
│       │   │   └── vitstr.yaml
│       │   ├── main.yaml
│       │   ├── model/
│       │   │   ├── abinet.yaml
│       │   │   ├── crnn.yaml
│       │   │   ├── parseq.yaml
│       │   │   ├── trba.yaml
│       │   │   └── vitstr.yaml
│       │   └── tune.yaml
│       ├── hubconf.py
│       ├── read.py
│       ├── requirements.txt
│       ├── setup.cfg
│       ├── setup.py
│       ├── strhub/
│       │   ├── __init__.py
│       │   ├── data/
│       │   │   ├── __init__.py
│       │   │   ├── aa_overrides.py
│       │   │   ├── augment.py
│       │   │   ├── dataset.py
│       │   │   ├── module.py
│       │   │   └── utils.py
│       │   └── models/
│       │       ├── __init__.py
│       │       ├── abinet/
│       │       │   ├── LICENSE
│       │       │   ├── __init__.py
│       │       │   ├── attention.py
│       │       │   ├── backbone.py
│       │       │   ├── model.py
│       │       │   ├── model_abinet_iter.py
│       │       │   ├── model_alignment.py
│       │       │   ├── model_language.py
│       │       │   ├── model_vision.py
│       │       │   ├── resnet.py
│       │       │   ├── system.py
│       │       │   └── transformer.py
│       │       ├── base.py
│       │       ├── crnn/
│       │       │   ├── LICENSE
│       │       │   ├── __init__.py
│       │       │   ├── model.py
│       │       │   └── system.py
│       │       ├── modules.py
│       │       ├── parseq/
│       │       │   ├── __init__.py
│       │       │   ├── modules.py
│       │       │   └── system.py
│       │       ├── trba/
│       │       │   ├── __init__.py
│       │       │   ├── feature_extraction.py
│       │       │   ├── model.py
│       │       │   ├── prediction.py
│       │       │   ├── system.py
│       │       │   └── transformation.py
│       │       ├── utils.py
│       │       └── vitstr/
│       │           ├── __init__.py
│       │           ├── model.py
│       │           └── system.py
│       ├── test.py
│       ├── tools/
│       │   ├── art_converter.py
│       │   ├── case_sensitive_str_datasets_converter.py
│       │   ├── coco_2_converter.py
│       │   ├── coco_text_converter.py
│       │   ├── create_lmdb_dataset.py
│       │   ├── filter_lmdb.py
│       │   ├── lsvt_converter.py
│       │   ├── mlt19_converter.py
│       │   ├── openvino_converter.py
│       │   ├── test_abinet_lm_acc.py
│       │   └── textocr_converter.py
│       ├── train.py
│       └── tune.py
├── test.py
├── train.py
└── util.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
**/__pycache__
.vscode
checkpoints
logs
outputs
temp

================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2024 Yiming Zhao

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
## UDiffText: A Unified Framework for High-quality Text Synthesis in Arbitrary Images via Character-aware Diffusion Models

<a href='https://arxiv.org/abs/2312.04884'><img src='https://img.shields.io/badge/Arxiv-2312.04884-DF826C'></a> 
<a href='https://udifftext.github.io/'><img src='https://img.shields.io/badge/Project-UDiffText-D0F288'></a> 
<a href='https://huggingface.co/spaces/ZYMPKU/UDiffText'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Demo-UDiffText-8ADAB2'></a> 

#### Our proposed UDiffText is capable of synthesizing accurate and harmonious text in either synthetic or real-word images, thus can be applied to tasks like scene text editing (a), arbitrary text generation (b) and accurate T2I generation (c)

![UDiffText Teaser](demo/teaser.png)

### 📬 News

- **2023.7.16** Our paper is accepted by ECCV2024!🥳
- **2023.12.11** Version 2.0 update (getting rid of trash codes🚮)
- **2023.12.3** Build Hugging Face demo
- **2023.12.1** Build Github project page
- **2023.11.30** Version 1.0 upload

### 🔨 Installation

1. Clone this repo: 
```
git clone https://github.com/ZYM-PKU/UDiffText.git
cd UDiffText
```

2. Install required Python packages

```
conda create -n udiff python=3.11
conda activate udiff
pip install torch==2.1.1 torchvision==0.16.1 --index-url https://download.pytorch.org/whl/cu121
pip install -r requirements.txt
```

3. Make the checkpoint directory and build the tree structure

```
mkdir ./checkpoints

checkpoints
├── AEs                    // AutoEncoder
├── encoders             
    ├── LabelEncoder       // Character-level encoder
    └── ViTSTR             // STR encoder
├── predictors             // STR model
├── pretrained             // Pretrained SD
└── ***.ckpt               // UDiffText checkpoint
```

### 💻 Training

1. Prepare your data

#### LAION-OCR
- Create a data directory **{your data root}/LAION-OCR** in your disk and put your data in it. Then set the **data_root** field in **./configs/dataset/locr.yaml**.
- For the downloading and preprocessing of Laion-OCR dataset, please refer to [TextDiffuser](https://github.com/microsoft/unilm/tree/master/textdiffuser) and our **./scripts/preprocess/laion_ocr_pre.ipynb**.

#### ICDAR13
- Create a data directory **{your data root}/ICDAR13** in your disk and put your data in it. Then set the **data_root** field in **./configs/dataset/icd13.yaml**.
- Build the tree structure as below:
```
ICDAR13
├── train                  // training set
    ├── annos              // annotations
        ├── gt_x.txt
        ├── ...
    └── images             // images
        ├── img_x.jpg
        ├── ...
└── val                    // validation set
    ├── annos              // annotations
        ├── gt_img_x.txt
        ├── ...
    └── images             // images
        ├── img_x.jpg
        ├── ...
```

#### TextSeg
- Create a data directory **{your data root}/TextSeg** in your disk and put your data in it. Then set the **data_root** field in **./configs/dataset/tsg.yaml**.
- Build the tree structure as below:
```
TextSeg
├── train                  // training set
    ├── annotation         // annotations
        ├── x_anno.json    // annotation json file
        ├── x_mask.png     // character-level mask
        ├── ...
    └── image              // images
        ├── x.jpg.jpg
        ├── ...
└── val                    // validation set
    ├── annotation         // annotations
        ├── x_anno.json    // annotation json file
        ├── x_mask.png     // character-level mask
        ├── ...
    └── image              // images
        ├── x.jpg
        ├── ...
```

#### SynthText
- Create a data directory **{your data root}/SynthText** in your disk and put your data in it. Then set the **data_root** field in **./configs/dataset/st.yaml**.
- Build the tree structure as below:
```
SynthText
├── 1                      // part 1
    ├── ant+hill_1_0.jpg   // image
    ├── ant+hill_1_1.jpg
    ├── ...
├── 2                      // part 2
├── ...
└── gt.mat                 // annotation file
```

2. Train the character-level encoder

Set the parameters in **./configs/pretrain.yaml** and run:

```
python pretrain.py
```

3. Train the UDiffText model

Download the [pretrained model](https://huggingface.co/stabilityai/stable-diffusion-2-inpainting/blob/main/512-inpainting-ema.ckpt) and put it in **./checkpoints/pretrained/**. You can ignore the "Missing Key" or "Unexcepted Key" warning when loading the checkpoint.

Set the parameters in **./configs/train.yaml**, especially the paths:

```
load_ckpt_path: ./checkpoints/pretrained/512-inpainting-ema.ckpt // Checkpoint of the pretrained SD
model_cfg_path: ./configs/train/textdesign_sd_2.yaml // UDiffText model config
dataset_cfg_path: ./configs/dataset/locr.yaml // Use the Laion-OCR dataset
```

and run:

```
python train.py
```

### 📏 Evaluation

1. Download our available [checkpoints](https://drive.google.com/drive/folders/1s8IWqqydaJBjukxViGKFj2N33lfoVkGf?usp=sharing) and put them in the corresponding directories in **./checkpoints**.

2. Set the parameters in **./configs/test.yaml**, especially the paths:

```
load_ckpt_path: "./checkpoints/***.ckpt"  // UDiffText checkpoint
model_cfg_path: "./configs/test/textdesign_sd_2.yaml"  // UDiffText model config
dataset_cfg_path: "./configs/dataset/locr.yaml"  // LAION-OCR dataset config
```

and run:

```
python test.py
```

### 🖼️ Demo

In order to run an interactive demo on your own machine, execute the code:

```
python demo.py
```

or try our online demo at [hugging face](https://huggingface.co/spaces/ZYMPKU/UDiffText):

![Demo](demo/demo.png)

### 🎉 Acknowledgement

- **Dataset**: We sincerely thank the open-source large image-text dataset LAION-OCR with character-level segmentations provided by [TextDiffuser](https://github.com/microsoft/unilm/tree/master/textdiffuser).

- **Code & Model**: We build our project based on the code repo of [Stable Diffusion XL](https://github.com/Stability-AI/generative-models) and leverage the pretrained checkpoint of [Stable Diffusion 2.0](https://github.com/Stability-AI/stablediffusion).

### 🪬 Citation

```
@misc{zhao2023udifftext,
      title={UDiffText: A Unified Framework for High-quality Text Synthesis in Arbitrary Images via Character-aware Diffusion Models}, 
      author={Yiming Zhao and Zhouhui Lian},
      year={2023},
      eprint={2312.04884},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}
```


================================================
FILE: configs/dataset/icd13.yaml
================================================
target: ICDAR13Dataset
params:

  data_root: '{your data root}'

  H: 512
  W: 512
  word_len: [1, 8]
  seq_len: 12
  mask_min_ratio: 0.01
  aug_text_enabled: True
  aug_text_ratio: 1.0

================================================
FILE: configs/dataset/locr.yaml
================================================
target: LAIONOCRDataset
params:

  data_root: '{your data root}'

  H: 512
  W: 512
  word_len: [1, 12]
  seq_len: 12
  mask_min_ratio: 0.01
  seg_min_ratio: 0.001
  aug_text_enabled: True
  aug_text_ratio: 1.0

  use_cached: False
  length: 100000

================================================
FILE: configs/dataset/st.yaml
================================================
target: SynthTextDataset
params:

  data_root: '{your data root}'

  H: 512
  W: 512
  word_len: [1, 12]
  mask_min_ratio: 0.01
  seg_min_ratio: 0.001

  length: 100000
  use_cached: False

================================================
FILE: configs/dataset/tsg.yaml
================================================
target: TextSegDataset
params:

  data_root: '{your data root}'

  H: 512
  W: 512
  word_len: [1, 12]
  seq_len: 12
  mask_min_ratio: 0.01
  seg_min_ratio: 0.005
  aug_text_enabled: True
  aug_text_ratio: 1.0

================================================
FILE: configs/demo.yaml
================================================
type: "demo"

# path
load_ckpt_path: "./checkpoints/{your checkpoint path}.ckpt"
model_cfg_path: "./configs/test/textdesign_sd_2.yaml"

# param
H: 512
W: 512
seq_len: 12
batch_size: 1

channel: 4 # AE latent channel
factor: 8 # AE downsample factor
scale: [4.0, 0.0] # cfg scale, None
noise_iters: 10
force_uc_zero_embeddings: ["ref", "label"]
aae_enabled: False
detailed: False

# runtime
steps: 50
init_step: 0
num_workers: 0
gpu: 0


================================================
FILE: configs/pretrain.yaml
================================================
# path
ckpt_dir: './checkpoints/encoders/LabelEncoder'

dataset:
  target: dataset.dataloader.LabelDataset
  params:
    size: 224
    length: 100000
    font_path: './dataset/utils/arial.ttf'
    min_len: 1
    max_len: 12

model:
  target: sgm.modules.encoders.modules.LabelEncoder
  params:
    trainable: True
    max_len: 12
    emb_dim: 2048
    n_heads: 8
    n_trans_layers: 12
    lr: 1e-5
    lambda_cls: 0.1
    lambda_pos: 0.1

    visual_config:
      target: sgm.modules.encoders.modules.ViTSTREncoder
      params:
        freeze: True
        ckpt_path: "./checkpoints/encoders/ViTSTR/vitstr_base_patch16_224.pth"
        size: 224
        patch_size: 16
        embed_dim: 768
        depth: 12
        num_heads: 12
        mlp_ratio: 4
        qkv_bias: True
        in_chans: 1


num_workers: 0
batch_size: 256
check_freq: 5


lightning:
  max_epochs: 1000
  accelerator: "cuda"
  devices: 
    - 0
  default_root_dir: "./logs/pre_logs"

================================================
FILE: configs/test/textdesign_sd_2.yaml
================================================
model:
  target: sgm.models.diffusion.DiffusionEngine
  params:
    opt_keys:
      - t_attn
    input_key: image
    scale_factor: 0.18215
    disable_first_stage_autocast: True

    denoiser_config:
      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
      params:
        num_idx: 1000

        weighting_config:
          target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
        scaling_config:
          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
        discretization_config:
          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization

    network_config:
      target: sgm.modules.diffusionmodules.openaimodel.UnifiedUNetModel
      params:
        in_channels: 9
        out_channels: 4
        ctrl_channels: 0
        model_channels: 320
        attention_resolutions: [4, 2, 1]
        save_attn_type: [t_attn]
        save_attn_layers: [output_blocks.6.1]
        num_res_blocks: 2
        channel_mult: [1, 2, 4, 4]
        num_head_channels: 64
        use_linear_in_transformer: True
        transformer_depth: 1
        t_context_dim: 2048

    conditioner_config:
      target: sgm.modules.GeneralConditioner
      params:
        emb_models:
          # textual crossattn cond
          - is_trainable: False
            emb_key: t_crossattn
            ucg_rate: 0.1
            input_key: label
            target: sgm.modules.encoders.modules.LabelEncoder
            params:
              max_len: 12
              emb_dim: 2048
              n_heads: 8
              n_trans_layers: 12
              ckpt_path: ./checkpoints/encoders/LabelEncoder/epoch=19-step=7820.ckpt
          # concat cond
          - is_trainable: False
            input_key: mask
            target: sgm.modules.encoders.modules.SpatialRescaler
            params:
              in_channels: 1
              multiplier: 0.125
          - is_trainable: False
            input_key: masked
            target: sgm.modules.encoders.modules.LatentEncoder
            params:
              scale_factor: 0.18215
              config:
                target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
                params:
                  ckpt_path: ./checkpoints/AEs/AE_inpainting_2.safetensors
                  embed_dim: 4
                  monitor: val/rec_loss
                  ddconfig:
                    attn_type: vanilla-xformers
                    double_z: true
                    z_channels: 4
                    resolution: 256
                    in_channels: 3
                    out_ch: 3
                    ch: 128
                    ch_mult: [1, 2, 4, 4]
                    num_res_blocks: 2
                    attn_resolutions: []
                    dropout: 0.0
                  lossconfig:
                    target: torch.nn.Identity

    first_stage_config:
      target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
      params:
        ckpt_path: ./checkpoints/AEs/AE_inpainting_2.safetensors
        embed_dim: 4
        monitor: val/rec_loss
        ddconfig:
          attn_type: vanilla-xformers
          double_z: true
          z_channels: 4
          resolution: 256
          in_channels: 3
          out_ch: 3
          ch: 128
          ch_mult: [1, 2, 4, 4]
          num_res_blocks: 2
          attn_resolutions: []
          dropout: 0.0
        lossconfig:
          target: torch.nn.Identity

    loss_fn_config:
      target: sgm.modules.diffusionmodules.loss.FullLoss # StandardDiffusionLoss
      params:
        seq_len: 12
        kernel_size: 3
        gaussian_sigma: 1.0
        min_attn_size: 16
        lambda_local_loss: 0.01
        lambda_ocr_loss: 0.001
        ocr_enabled: False

        predictor_config:
          target: sgm.modules.predictors.model.ParseqPredictor
          params:
            ckpt_path: "./checkpoints/predictors/parseq-bb5792a6.pt"
        
        sigma_sampler_config:
          target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
          params:
            num_idx: 1000
            
            discretization_config:
              target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization

================================================
FILE: configs/test.yaml
================================================
type: "test"

# path
load_ckpt_path: "./checkpoints/{your checkpoint path}.ckpt"
model_cfg_path: "./configs/test/textdesign_sd_2.yaml"
dataset_cfg_path: "./configs/dataset/icd13.yaml"
output_dir: "./outputs"
temp_dir: "./temp"

# param
channel: 4 # AE latent channel
factor: 8 # AE downsample factor
scale: [5.0, 0.0] # cfg scale, None
noise_iters: 10 # iterations for initial noise searching
force_uc_zero_embeddings: ["label"] # condition label
aae_enabled: False # attend and excite
detailed: False # save visualization results

# runtime
steps: 50 # sampling steps
init_step: 0
batch_size: 1
num_workers: 0
gpu: 0 # index of your gpu device
max_iter: 100
shuffle: True
quan_test: False # quantitative test

# ocr
ocr_enabled: True
predictor_config:
  target: sgm.modules.predictors.model.ParseqPredictor
  params:
    ckpt_path: "./checkpoints/predictors/parseq-bb5792a6.pt"

================================================
FILE: configs/train/textdesign_sd_2.yaml
================================================
model:
  target: sgm.models.diffusion.DiffusionEngine
  params:
    opt_keys:
      - t_attn
      - t_norm
    input_key: image
    scale_factor: 0.18215
    disable_first_stage_autocast: True

    denoiser_config:
      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
      params:
        num_idx: 1000

        weighting_config:
          target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
        scaling_config:
          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
        discretization_config:
          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization

    network_config:
      target: sgm.modules.diffusionmodules.openaimodel.UnifiedUNetModel
      params:
        in_channels: 9
        out_channels: 4
        ctrl_channels: 0
        model_channels: 320
        attention_resolutions: [4, 2, 1]
        save_attn_type: [t_attn]
        save_attn_layers: [output_blocks.6.1]
        num_res_blocks: 2
        channel_mult: [1, 2, 4, 4]
        num_head_channels: 64
        use_linear_in_transformer: True
        transformer_depth: 1
        t_context_dim: 2048

    conditioner_config:
      target: sgm.modules.GeneralConditioner
      params:
        emb_models:
          # textual crossattn cond
          - is_trainable: False
            emb_key: t_crossattn
            ucg_rate: 0.1
            input_key: label
            target: sgm.modules.encoders.modules.LabelEncoder
            params:
              max_len: 12
              emb_dim: 2048
              n_heads: 8
              n_trans_layers: 12
              ckpt_path: ./checkpoints/encoders/LabelEncoder/epoch=19-step=7820.ckpt
          # concat cond
          - is_trainable: False
            input_key: mask
            target: sgm.modules.encoders.modules.SpatialRescaler
            params:
              in_channels: 1
              multiplier: 0.125
          - is_trainable: False
            input_key: masked
            target: sgm.modules.encoders.modules.LatentEncoder
            params:
              scale_factor: 0.18215
              config:
                target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
                params:
                  ckpt_path: ./checkpoints/AEs/AE_inpainting_2.safetensors
                  embed_dim: 4
                  monitor: val/rec_loss
                  ddconfig:
                    attn_type: vanilla-xformers
                    double_z: true
                    z_channels: 4
                    resolution: 256
                    in_channels: 3
                    out_ch: 3
                    ch: 128
                    ch_mult: [1, 2, 4, 4]
                    num_res_blocks: 2
                    attn_resolutions: []
                    dropout: 0.0
                  lossconfig:
                    target: torch.nn.Identity

    first_stage_config:
      target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
      params:
        ckpt_path: ./checkpoints/AEs/AE_inpainting_2.safetensors
        embed_dim: 4
        monitor: val/rec_loss
        ddconfig:
          attn_type: vanilla-xformers
          double_z: true
          z_channels: 4
          resolution: 256
          in_channels: 3
          out_ch: 3
          ch: 128
          ch_mult: [1, 2, 4, 4]
          num_res_blocks: 2
          attn_resolutions: []
          dropout: 0.0
        lossconfig:
          target: torch.nn.Identity

    loss_fn_config:
      target: sgm.modules.diffusionmodules.loss.FullLoss # StandardDiffusionLoss
      params:
        seq_len: 12
        kernel_size: 3
        gaussian_sigma: 1.0
        min_attn_size: 16
        lambda_local_loss: 0.01
        lambda_ocr_loss: 0.001
        ocr_enabled: False

        predictor_config:
          target: sgm.modules.predictors.model.ParseqPredictor
          params:
            ckpt_path: "./checkpoints/predictors/parseq-bb5792a6.pt"
        
        sigma_sampler_config:
          target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
          params:
            num_idx: 1000
            
            discretization_config:
              target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization

    sampler_config:
      target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
      params:
        num_steps: 50

        discretization_config:
          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization

        guider_config:
          target: sgm.modules.diffusionmodules.guiders.VanillaCFG
          params:
            scale: 5.0

================================================
FILE: configs/train.yaml
================================================
type: "train"

# path
save_ckpt_dir: ./checkpoints
load_ckpt_path: ./checkpoints/pretrained/512-inpainting-ema.ckpt
model_cfg_path: ./configs/train/textdesign_sd_2.yaml
dataset_cfg_path: ./configs/dataset/locr.yaml

# param
save_ckpt_freq: 1
num_workers: 0
batch_size: 16
base_learning_rate: 5.0e-5
shuffle: False

# runtime
lightning:
  max_epochs: 100
  accelerator: gpu
  strategy: ddp_find_unused_parameters_true
  accumulate_grad_batches: 4 
  devices: [1,2,3,4,5,6,7,8]
  default_root_dir: ./logs/base_logs
  profiler: simple 

================================================
FILE: dataset/__init__.py
================================================


================================================
FILE: dataset/dataloader.py
================================================
import os,glob
import torch
import cv2
import scipy
import string
import json
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.utils.data as data
import numpy as np

from tqdm import tqdm
from omegaconf import OmegaConf
from PIL import Image, ImageDraw, ImageFont
from os.path import join as ospj
from torchvision.utils import save_image
from random import choice, randint, sample, uniform, shuffle
from util import *


def region_draw_text(H, W, r_bbox, text, font_path = "./dataset/utils/arial.ttf"):

    m_top, m_bottom, m_left, m_right = r_bbox
    m_h, m_w = m_bottom-m_top, m_right-m_left

    font = ImageFont.truetype(font_path, 128)
    std_l, std_t, std_r, std_b = font.getbbox(text)
    std_h, std_w = std_b - std_t, std_r - std_l
    image = Image.new('RGB', (std_w, std_h), color = (255, 255, 255))
    draw = ImageDraw.Draw(image)
    draw.text((0, 0), text, fill = (0, 0, 0), font=font, anchor="lt")
    
    transform = transforms.Compose([
        transforms.Resize((m_h, m_w), transforms.InterpolationMode.BICUBIC, antialias=True),
        transforms.ToTensor()
    ])
    
    image = transform(image)

    result = torch.ones((3, H, W))
    result[:, m_top:m_bottom, m_left:m_right] = image

    return result


def initialize_word_dict():

        with open('./dataset/utils/words.txt', 'r') as f:
            word_list = f.readlines()
        
        words = []
        for word_line in word_list:
            words += word_line[:-1].split(" ")

        words.sort(key = lambda w: len(w))
        word_dict = {l:[] for l in range(len(words[0]), len(words[-1])+1)}
        for word in words:
            word_dict[len(word)].append(word)

        return word_dict
        

class LabelDataset(data.Dataset):

    def __init__(self, size, length, font_path, min_len, max_len) -> None:
        super().__init__()

        # constraint
        self.length = length
        self.size = size

        # path
        self.font_path = font_path

        # word dict
        self.character = string.printable[:-6]
        self.min_len = min_len
        self.max_len = max_len

        self.grayscale = transforms.Grayscale()
        self.resize = transforms.Resize((self.size, self.size), transforms.InterpolationMode.BICUBIC, antialias=True)

    def __len__(self):
        
        return self.length
    
    def __getitem__(self, index):

        while True:

            text_len = randint(self.min_len, self.max_len)
            text = "".join([choice(self.character) for i in range(text_len)])
            font_path = self.font_path

            try: 
                font = ImageFont.truetype(font_path, 128)
                std_l, std_t, std_r, std_b = font.getbbox(text)
                std_h, std_w = std_b - std_t, std_r - std_l
                if std_h == 0 or std_w == 0:
                    continue
            except:
                continue
            
            try:
                image = Image.new('RGB', (std_w, std_h), color = (0,0,0))
                draw = ImageDraw.Draw(image)
                draw.text((0, 0), text, fill = (255,255,255), font=font, anchor="lt")
            except:
                continue

            image = transforms.ToTensor()(image)
            image = self.grayscale(image)
            image = self.resize(image)

            batch = {
                "image": image,
                "text": text
            }

            return batch
    

class ICDAR13Dataset(data.Dataset):

    def __init__(self, cfgs, datype) -> None:
        super().__init__()

        # basic
        self.type = datype
        self.character = string.printable[:-6]

        # path
        self.data_root = ospj(cfgs.data_root, "ICDAR13", self.type)
        self.image_root = ospj(self.data_root, "images")
        self.anno_root = ospj(self.data_root, "annos")
        self.anno_paths = sorted(glob.glob(ospj(self.anno_root, "*.txt")))

        # constraint
        self.H = cfgs.H
        self.W = cfgs.W
        self.word_len = cfgs.word_len
        self.seq_len = cfgs.seq_len
        self.mask_min_ratio = cfgs.mask_min_ratio
        self.aug_text_enabled = cfgs.aug_text_enabled
        self.aug_text_ratio = cfgs.aug_text_ratio

        self.items = []
        total_count = 0
        for anno_path in self.anno_paths:
            name = anno_path.split(os.sep)[-1].split(".")[0].replace("gt_", "")
            with open(anno_path, "r") as fp:
                annos = fp.readlines()

            for anno in annos:

                total_count += 1
                text = anno.split("\"")[1]
                left, top, right, bottom = [int(s) for s in anno.split(", ")[:4]]
                area = (bottom-top) * (right-left)
                bbox = np.array((top, bottom, left, right))

                if len(text) < self.word_len[0] or len(text) > self.word_len[1]: continue
                if not all([c in self.character for c in text]): continue
                if area / (self.H * self.W) < self.mask_min_ratio: continue

                self.items.append({
                    "image_path": ospj(self.image_root, f"{name}.jpg"),
                    "text": text,
                    "bbox": bbox
                })

        self.length = len(self.items)
        print(f"Total: {total_count}, filtered: {self.length}")
        self.count = -1
        self.word_dict = initialize_word_dict()
    
    def __len__(self):
        
        return self.length
    
    def augment(self, image, bbox):

        h, w, _ = image.shape
        m_top, m_bottom, m_left, m_right = bbox

        mask = np.ones((h, w), dtype=np.uint8)
        mask[m_top:m_bottom, m_left:m_right] = 0

        if h >= w:
            delta = (h-w)//2
            m_left += delta; m_right += delta
            image = cv2.copyMakeBorder(image, 0,0,delta,delta, cv2.BORDER_REPLICATE)
            mask = cv2.copyMakeBorder(mask, 0,0,delta,delta, cv2.BORDER_CONSTANT, value = (1,1,1))
        else:
            delta = (w-h)//2
            m_top += delta; m_bottom += delta
            image = cv2.copyMakeBorder(image, delta,delta,0,0, cv2.BORDER_REPLICATE)
            mask = cv2.copyMakeBorder(mask, delta,delta,0,0, cv2.BORDER_CONSTANT, value = (1,1,1))

        m_h, m_w = int(m_bottom-m_top), int(m_right-m_left)
        c_h, c_w = m_top + m_h//2, m_left + m_w//2

        h, w, _ = image.shape
        area = (m_bottom-m_top) * (m_right-m_left)
        aug_min_ratio = self.mask_min_ratio * 4
        if area/(h*w) < aug_min_ratio:
            d = int((area/aug_min_ratio)**0.5)
            d = max(d, max(m_h, m_w))
            if c_h <= h - c_h:
                delta_top = min(c_h, d//2)
                delta_bottom = d - delta_top
            else:
                delta_bottom = min(h - c_h, d//2)
                delta_top = d - delta_bottom
            if c_w <= w - c_w:
                delta_left = min(c_w, d//2)
                delta_right = d - delta_left
            else:
                delta_right = min(w - c_w, d//2)
                delta_left = d - delta_right

            n_top, n_bottom = c_h - delta_top, c_h + delta_bottom
            n_left, n_right = c_w - delta_left, c_w + delta_right

            image = image[n_top:n_bottom, n_left:n_right, :]
            mask = mask[n_top:n_bottom, n_left:n_right]

            m_top -= n_top; m_bottom -= n_top
            m_left -= n_left; m_right -= n_left

        h, w, _ = image.shape
        m_top, m_bottom = int(m_top * (self.H/h)), int(m_bottom * (self.H/h))
        m_left, m_right = int(m_left * (self.W/w)), int(m_right * (self.W/w))
        
        image = cv2.resize(image, (self.W, self.H))
        mask = cv2.resize(mask, (self.W, self.H))

        r_bbox = torch.tensor((m_top, m_bottom, m_left, m_right))
        
        return image, mask, r_bbox
        
    def __getitem__(self, index):

        self.count += 1

        item = self.items[index]
        image_path = item["image_path"]
        text = item["text"]
        bbox = item["bbox"]

        aug_text = choice(self.word_dict[len(text)]) if uniform(0, 1) <= self.aug_text_ratio else text

        image = Image.open(image_path).convert("RGB")
        w, h = image.size
        image = np.asarray(image)
        image, mask, r_bbox = self.augment(image, bbox)

        image = torch.from_numpy(image.transpose(2,0,1)).to(dtype=torch.float32) / 127.5 - 1.0

        mask = torch.from_numpy(mask[None]).to(dtype=torch.float32)
        masked = image * mask
        mask = 1 - mask

        seg_mask = torch.cat((torch.ones(len(text)), torch.zeros(self.seq_len-len(text))))

        rendered = region_draw_text(self.H, self.W, r_bbox, aug_text if self.aug_text_enabled else text)

        # additional cond
        txt = f"\"{aug_text if self.aug_text_enabled else text}\""
        original_size_as_tuple = torch.tensor((h, w))
        crop_coords_top_left = torch.tensor((0, 0))
        target_size_as_tuple = torch.tensor((self.H, self.W))

        batch = {
            "image": image,
            "mask": mask,
            "masked": masked,
            "seg_mask": seg_mask,
            "r_bbox": r_bbox,
            "rendered": rendered,
            "label": aug_text if self.aug_text_enabled else text,
            "txt": txt,
            "original_size_as_tuple": original_size_as_tuple,
            "crop_coords_top_left": crop_coords_top_left,
            "target_size_as_tuple": target_size_as_tuple,
            "name": str(self.count)
        }

        return batch


class TextSegDataset(data.Dataset):

    def __init__(self, cfgs, datype) -> None:
        super().__init__()

        # basic
        self.type = datype
        self.character = string.printable[:-6]

        # path
        self.data_root = ospj(cfgs.data_root, "TextSeg", self.type)
        self.image_root = ospj(self.data_root, "image")
        self.anno_root = ospj(self.data_root, "annotation")

        # constraint
        self.H = cfgs.H
        self.W = cfgs.W
        self.word_len = cfgs.word_len
        self.seq_len = cfgs.seq_len
        self.mask_min_ratio = cfgs.mask_min_ratio
        self.seg_min_ratio = cfgs.seg_min_ratio
        self.aug_text_enabled = cfgs.aug_text_enabled
        self.aug_text_ratio = cfgs.aug_text_ratio

        image_paths = sorted(glob.glob(ospj(self.image_root, "*.jpg")))
        anno_paths = sorted(glob.glob(ospj(self.anno_root, "*.json")))
        seg_paths = sorted([p for p in glob.glob(ospj(self.anno_root, "*.png")) if "eff" not in p])

        self.items = []
        total_count = 0
        for image_path, anno_path, seg_path in zip(image_paths, anno_paths, seg_paths):
            with open(anno_path, "rb") as fp:
                annos = json.load(fp)
            for anno in annos.values():
                total_count += 1
                text = anno["text"]
                chars = [anno["char"][key]["text"] for key in anno["char"]]
                bbox = np.array(anno["bbox"]).reshape((4,2))
                seg_values = [c["mask_value"] for c in anno["char"].values()]
                area = cv2.contourArea(bbox)

                if "".join(chars) != text: continue
                if "#" in text: continue
                if len(text) < self.word_len[0] or len(text) > self.word_len[1]: continue
                if not all([c in self.character for c in text]): continue
                if area / (self.H * self.W) < self.mask_min_ratio: continue

                self.items.append({
                    "image_path": image_path,
                    "seg_path": seg_path,
                    "text": text,
                    "bbox": bbox,
                    "seg_values": seg_values
                })

        self.length = len(self.items)
        print(f"Total: {total_count}, filtered: {self.length}")
        self.count = -1
        self.word_dict = initialize_word_dict()

    def __len__(self):
        
        return self.length
    
    def augment(self, image, seg, text, bbox, seg_values):

        h, w, _ = image.shape
        m_top, m_bottom = int(np.min(bbox[:,1])), int(np.max(bbox[:,1]))
        m_left, m_right = int(np.min(bbox[:,0])), int(np.max(bbox[:,0]))

        mask = np.ones((h, w), dtype=np.uint8)
        mask = cv2.fillConvexPoly(mask, bbox, 0)

        if h >= w:
            delta = (h-w)//2
            m_left += delta; m_right += delta
            image = cv2.copyMakeBorder(image, 0,0,delta,delta, cv2.BORDER_REPLICATE)
            mask = cv2.copyMakeBorder(mask, 0,0,delta,delta, cv2.BORDER_CONSTANT, value = (1,1,1))
            seg = cv2.copyMakeBorder(seg, 0,0,delta,delta, cv2.BORDER_CONSTANT, value = (0,0,0))
        else:
            delta = (w-h)//2
            m_top += delta; m_bottom += delta
            image = cv2.copyMakeBorder(image, delta,delta,0,0, cv2.BORDER_REPLICATE)
            mask = cv2.copyMakeBorder(mask, delta,delta,0,0, cv2.BORDER_CONSTANT, value = (1,1,1))
            seg = cv2.copyMakeBorder(seg, delta,delta,0,0, cv2.BORDER_CONSTANT, value = (0,0,0))

        m_h, m_w = int(m_bottom-m_top), int(m_right-m_left)
        c_h, c_w = m_top + m_h//2, m_left + m_w//2

        h, w, _ = image.shape
        area = cv2.contourArea(bbox)
        aug_min_ratio = self.mask_min_ratio * 4
        if area/(h*w) < aug_min_ratio:
            d = int((area/aug_min_ratio)**0.5)
            d = max(d, max(m_h, m_w))
            if c_h <= h - c_h:
                delta_top = min(c_h, d//2)
                delta_bottom = d - delta_top
            else:
                delta_bottom = min(h - c_h, d//2)
                delta_top = d - delta_bottom
            if c_w <= w - c_w:
                delta_left = min(c_w, d//2)
                delta_right = d - delta_left
            else:
                delta_right = min(w - c_w, d//2)
                delta_left = d - delta_right

            n_top, n_bottom = c_h - delta_top, c_h + delta_bottom
            n_left, n_right = c_w - delta_left, c_w + delta_right

            image = image[n_top:n_bottom, n_left:n_right, :]
            mask = mask[n_top:n_bottom, n_left:n_right]
            seg = seg[n_top:n_bottom, n_left:n_right, :]

            m_top -= n_top; m_bottom -= n_top
            m_left -= n_left; m_right -= n_left

        segs = []
        text_indices = [[i for i, c in enumerate(text) if c == ch] for ch in text]
        for i in range(len(text)):
            indices = text_indices[i]
            seg_i = np.sum([(seg == seg_values[ind]).astype(np.uint8).mean(axis=-1) for ind in indices], axis=0) # position un-aware
            seg_i = np.clip(seg_i, 0, 1)
            seg_i = cv2.morphologyEx(seg_i, cv2.MORPH_OPEN, np.ones((1,2),np.int8), iterations=2) # denoise
            seg_i = cv2.morphologyEx(seg_i, cv2.MORPH_OPEN, np.ones((2,1),np.int8), iterations=2) # denoise
            seg_i = cv2.morphologyEx(seg_i, cv2.MORPH_DILATE, np.ones((3,3),np.int8), iterations=7) # dilate
            segs.append(seg_i[None])

        segs = segs + [np.zeros_like(segs[0]) for i in range(self.seq_len-len(segs))]
        seg = np.concatenate(segs, axis=0)

        h, w, _ = image.shape
        m_top, m_bottom = int(m_top * (self.H/h)), int(m_bottom * (self.H/h))
        m_left, m_right = int(m_left * (self.W/w)), int(m_right * (self.W/w))
        
        image = cv2.resize(image, (self.W, self.H))
        seg = cv2.resize(seg.transpose((1,2,0)), (self.W, self.H)).transpose((2,0,1))
        mask = cv2.resize(mask, (self.W, self.H))

        r_bbox = torch.tensor((m_top, m_bottom, m_left, m_right))
        
        return image, seg, mask, r_bbox
        
    def __getitem__(self, index):

        self.count += 1

        while True:

            item = self.items[index]
            image_path = item["image_path"]
            seg_path = item["seg_path"]
            text = item["text"]
            bbox = item["bbox"]
            seg_values = item["seg_values"]
            
            aug_text = choice(self.word_dict[len(text)]) if uniform(0, 1) <= self.aug_text_ratio else text

            image = Image.open(image_path).convert("RGB")
            seg = Image.open(seg_path).convert("RGB")
            w, h = image.size
            image = np.asarray(image)
            seg = np.asarray(seg)
            image, seg, mask, r_bbox = self.augment(image, seg, text, bbox, seg_values)
            
            image = torch.from_numpy(image.transpose(2,0,1)).to(dtype=torch.float32) / 127.5 - 1.0

            mask = torch.from_numpy(mask[None]).to(dtype=torch.float32)
            masked = image * mask
            mask = 1 - mask

            seg = torch.from_numpy(seg)
            seg_mask = torch.cat((torch.ones(len(text)), torch.zeros(self.seq_len-len(text))))

            rendered = region_draw_text(self.H, self.W, r_bbox, aug_text if self.aug_text_enabled else text)

            # additional cond
            txt = f"\"{aug_text if self.aug_text_enabled else text}\""
            original_size_as_tuple = torch.tensor((h, w))
            crop_coords_top_left = torch.tensor((0, 0))
            target_size_as_tuple = torch.tensor((self.H, self.W))

            batch = {
                "image": image,
                "seg": seg,
                "seg_mask": seg_mask,
                "mask": mask,
                "masked": masked,
                "r_bbox": r_bbox,
                "rendered": rendered,
                "label": aug_text if self.aug_text_enabled else text,
                "txt": txt,
                "original_size_as_tuple": original_size_as_tuple,
                "crop_coords_top_left": crop_coords_top_left,
                "target_size_as_tuple": target_size_as_tuple,
                "name": str(self.count)
            }

            return batch


class SynthTextDataset(data.Dataset):

    def __init__(self, cfgs, datype) -> None:
        super().__init__()

        # basic
        self.type = datype
        self.length = cfgs.length
        self.character = string.printable[:-6]

        # path
        self.data_root = ospj(cfgs.data_root, "SynthText")
        self.anno_path = ospj(self.data_root, "gt.mat")

        # constraint
        self.H = cfgs.H
        self.W = cfgs.W
        self.word_len = cfgs.word_len
        self.mask_min_ratio = cfgs.mask_min_ratio
        self.seg_min_ratio = cfgs.seg_min_ratio

        anno = scipy.io.loadmat(self.anno_path)
        image_names = anno["imnames"][0]
        word_bboxes = anno["wordBB"][0]
        char_bboxes = anno["charBB"][0]
        txts = anno["txt"][0]

        if cfgs.use_cached:
            with open(ospj(self.data_root, "items.json"), "r") as fp:
                self.items = json.load(fp)
        else:
            self.items = []
            for image_name, word_bbox, char_bbox, txt in zip(image_names, word_bboxes, char_bboxes, txts):
                image_name = image_name[0]
                image_path = ospj(self.data_root, image_name)

                txt_list = []
                for frag in txt:
                    frag = frag.replace("\n", " ")
                    frags = [s for s in frag.split(" ") if s != ""]
                    txt_list += frags
                
                if word_bbox.ndim < 3: word_bbox = word_bbox[...,None]
                word_bbox = word_bbox.transpose((2,1,0)).astype(np.int32)
                char_bbox = char_bbox.transpose((2,1,0)).astype(np.int32)

                pointer = 0
                for bbox, text in zip(word_bbox, txt_list):

                    seg_bboxs = char_bbox[pointer: pointer+len(text)]
                    pointer += len(text)
                    area = cv2.contourArea(bbox)

                    if len(text) < self.word_len[0] or len(text) > self.word_len[1]: continue
                    if area / (self.H * self.W) < self.mask_min_ratio: continue

                    self.items.append({
                        "image_path": image_path,
                        "text": text,
                        "bbox": bbox.tolist(),
                        "seg_bboxs" : seg_bboxs.tolist()
                    })

            with open(ospj(self.data_root, "items.json"), "w") as fp:
                json.dump(self.items, fp)

        self.count = -1
    
    def __len__(self):
        
        return self.length
    
    def augment(self, image, bbox, seg_bboxs):

        h, w, _ = image.shape
        m_top, m_bottom = max(0, int(np.min(bbox[:,1]))), min(h, int(np.max(bbox[:,1])))
        m_left, m_right = max(0, int(np.min(bbox[:,0]))), min(w, int(np.max(bbox[:,0])))

        mask = np.ones((h, w), dtype=np.uint8)
        mask = cv2.fillConvexPoly(mask, bbox, 0)

        segs = []
        seg_sum = 0
        for seg_bbox in seg_bboxs:
            seg_i = np.zeros_like(mask)
            seg_i = cv2.fillConvexPoly(seg_i, seg_bbox, 1)
            segs.append(seg_i[None])
            seg_sum += seg_i.sum()
        
        seg_ratio = float(seg_sum / len(segs)) / (h*w)
        segs = segs + [np.zeros_like(segs[0]) for i in range(self.word_len[1]-len(segs))]
        seg = np.concatenate(segs, axis=0)

        if h >= w:
            delta = (h-w)//2
            m_left += delta; m_right += delta
            image = cv2.copyMakeBorder(image, 0,0,delta,delta, cv2.BORDER_REPLICATE)
            mask = cv2.copyMakeBorder(mask, 0,0,delta,delta, cv2.BORDER_CONSTANT, value = (1,1,1))
            seg = cv2.copyMakeBorder(seg.transpose((1,2,0)), 0,0,delta,delta, cv2.BORDER_CONSTANT, value = (0,0,0)).transpose((2,0,1))

        else:
            delta = (w-h)//2
            m_top += delta; m_bottom += delta
            image = cv2.copyMakeBorder(image, delta,delta,0,0, cv2.BORDER_REPLICATE)
            mask = cv2.copyMakeBorder(mask, delta,delta,0,0, cv2.BORDER_CONSTANT, value = (1,1,1))
            seg = cv2.copyMakeBorder(seg.transpose((1,2,0)), delta,delta,0,0, cv2.BORDER_CONSTANT, value = (0,0,0)).transpose((2,0,1))

        m_h, m_w = int(m_bottom-m_top), int(m_right-m_left)
        c_h, c_w = m_top + m_h//2, m_left + m_w//2

        h, w, _ = image.shape
        area = cv2.contourArea(bbox)
        aug_min_ratio = self.mask_min_ratio * 4
        if area/(h*w) < aug_min_ratio:
            d = int((area/aug_min_ratio)**0.5)
            d = max(d, max(m_h, m_w))
            if c_h <= h - c_h:
                delta_top = min(c_h, d//2)
                delta_bottom = d - delta_top
            else:
                delta_bottom = min(h - c_h, d//2)
                delta_top = d - delta_bottom
            if c_w <= w - c_w:
                delta_left = min(c_w, d//2)
                delta_right = d - delta_left
            else:
                delta_right = min(w - c_w, d//2)
                delta_left = d - delta_right

            n_top, n_bottom = c_h - delta_top, c_h + delta_bottom
            n_left, n_right = c_w - delta_left, c_w + delta_right

            image = image[n_top:n_bottom, n_left:n_right, :]
            mask = mask[n_top:n_bottom, n_left:n_right]
            seg = seg[:, n_top:n_bottom, n_left:n_right]

            m_top -= n_top; m_bottom -= n_top
            m_left -= n_left; m_right -= n_left

        h, w, _ = image.shape
        m_top, m_bottom = int(m_top * (self.H/h)), int(m_bottom * (self.H/h))
        m_left, m_right = int(m_left * (self.W/w)), int(m_right * (self.W/w))
        
        image = cv2.resize(image, (self.W, self.H))
        seg = cv2.resize(seg.transpose((1,2,0)), (self.W, self.H)).transpose((2,0,1))
        mask = cv2.resize(mask, (self.W, self.H))

        r_bbox = torch.tensor((m_top, m_bottom, m_left, m_right))
        
        return image, seg, mask, seg_ratio, r_bbox
        
    def __getitem__(self, index):

        self.count += 1

        while True:
        
            item = choice(self.items)
            image_path = item["image_path"]
            text = item["text"]
            bbox = np.array(item["bbox"])
            seg_bboxs = np.array(item["seg_bboxs"])

            image = Image.open(image_path).convert("RGB")
            w, h = image.size
            image = np.asarray(image)
            image, seg, mask, seg_ratio, r_bbox = self.augment(image, bbox, seg_bboxs)

            if seg_ratio < self.seg_min_ratio: continue
            
            image = torch.from_numpy(image.transpose(2,0,1)).to(dtype=torch.float32) / 127.5 - 1.0

            mask = torch.from_numpy(mask[None]).to(dtype=torch.float32)
            masked = image * mask
            mask = 1 - mask

            seg = torch.from_numpy(seg).to(dtype=torch.float32)
            seg_mask = torch.cat((torch.ones(len(text)), torch.zeros(self.word_len[1]-len(text))))

            # additional cond
            txt = f"\"{text}\""
            original_size_as_tuple = torch.tensor((h, w))
            crop_coords_top_left = torch.tensor((0, 0))
            target_size_as_tuple = torch.tensor((self.H, self.W))

            batch = {
                "image": image,
                "seg": seg,
                "seg_mask": seg_mask,
                "mask": mask,
                "masked": masked,
                "r_bbox": r_bbox,
                "label": text,
                "txt": txt,
                "original_size_as_tuple": original_size_as_tuple,
                "crop_coords_top_left": crop_coords_top_left,
                "target_size_as_tuple": target_size_as_tuple,
                "name": str(self.count)
            }

            return batch


class LAIONOCRDataset(data.Dataset):

    def __init__(self, cfgs, datype) -> None:
        super().__init__()

        # basic
        self.type = datype
        self.character = string.printable[:-6]

        # path
        self.data_root = ospj(cfgs.data_root, "LAION-OCR", self.type)

        # constraint
        self.H = cfgs.H
        self.W = cfgs.W
        self.W_std = 512
        self.H_std = 512
        self.word_len = cfgs.word_len
        self.seq_len = cfgs.seq_len
        self.mask_min_ratio = cfgs.mask_min_ratio
        self.seg_min_ratio = cfgs.seg_min_ratio
        self.aug_text_enabled = cfgs.aug_text_enabled if self.type != "train" else False
        self.aug_text_ratio = cfgs.aug_text_ratio

        if cfgs.use_cached:
            with open(ospj(cfgs.data_root, "LAION-OCR", f"{self.type}_items.json"), "r") as fp:
                self.items = json.load(fp)
        else:
            self.items = []
            data_dirs = sorted(glob.glob(ospj(self.data_root, "*")))
            len_count = area_count = text_count = 0
            for data_dir in data_dirs:
                image_path = ospj(data_dir, "image.jpg")
                ocr_path = ospj(data_dir, "ocr.txt")
                seg_path = ospj(data_dir, "charseg.npy")
                
                with open(ocr_path, "r") as fp:
                    ocrs = fp.readlines()
                for ocr in ocrs:
                    text, bbox_str, _ = ocr.strip("\n").split(" ")
                    bbox = np.array([int(v) for v in bbox_str.split(",")]).reshape((4,2))
                    area = cv2.contourArea(bbox)

                    if len(text) < self.word_len[0] or len(text) > self.word_len[1]:
                        len_count += 1
                        continue
                    if not all([c in self.character for c in text]): 
                        text_count += 1
                        continue
                    if area / (self.W_std*self.H_std) < self.mask_min_ratio:
                        area_count += 1
                        continue

                    self.items.append({
                        "image_path": image_path,
                        "seg_path": seg_path,
                        "text": text,
                        "bbox_str": bbox_str,
                    })

            with open(ospj(cfgs.data_root, "LAION-OCR", f"{self.type}_items.json"), "w") as fp:
                json.dump(self.items, fp)
            
            print(f"Total length: {len(self.items)}  filtered out {len_count} len_ill, {area_count} area_ill, {text_count} text_ill")
            
        self.length = cfgs.length
        self.count = -1
        self.word_dict = initialize_word_dict()

    def __len__(self):
        
        return self.length
    
    def augment(self, image, seg, text, bbox):

        image = cv2.resize(image, (self.W_std, self.H_std))
        seg = cv2.resize(seg.astype(np.uint8), (self.W_std, self.H_std))
        mask = np.ones((self.H_std, self.W_std), dtype=np.uint8)
        mask = cv2.fillConvexPoly(mask, bbox, 0)

        h, w, _ = image.shape
        m_top, m_bottom = max(0, int(np.min(bbox[:,1]))), min(self.H_std, int(np.max(bbox[:,1])))
        m_left, m_right = max(0, int(np.min(bbox[:,0]))), min(self.W_std, int(np.max(bbox[:,0])))
        m_h, m_w = int(m_bottom-m_top), int(m_right-m_left)
        c_h, c_w = m_top + m_h//2, m_left + m_w//2

        area = cv2.contourArea(bbox)
        aug_min_ratio = self.mask_min_ratio * 4
        if area/(h*w) < aug_min_ratio:
            d = int((area/aug_min_ratio)**0.5)
            d = max(d, max(m_h, m_w))
            if c_h <= h - c_h:
                delta_top = min(c_h, d//2)
                delta_bottom = d - delta_top
            else:
                delta_bottom = min(h - c_h, d//2)
                delta_top = d - delta_bottom
            if c_w <= w - c_w:
                delta_left = min(c_w, d//2)
                delta_right = d - delta_left
            else:
                delta_right = min(w - c_w, d//2)
                delta_left = d - delta_right

            n_top, n_bottom = c_h - delta_top, c_h + delta_bottom
            n_left, n_right = c_w - delta_left, c_w + delta_right

            image = image[n_top:n_bottom, n_left:n_right, :]
            mask = mask[n_top:n_bottom, n_left:n_right]
            seg = seg[n_top:n_bottom, n_left:n_right]

            m_top -= n_top; m_bottom -= n_top
            m_left -= n_left; m_right -= n_left

        seg = seg * (1 - mask)

        segs = [None for i in range(len(text))]
        ch_dict = {}
        for i in range(len(text)):
            if text[i] in ch_dict: ch_dict[text[i]].append(i)
            else: ch_dict[text[i]] = [i]
        
        for ch in ch_dict:
            ind = self.character.find(ch) + 1
            ind_l = self.character.find(ch.lower()) + 1
            seg_i = ((seg == ind).astype(np.uint8) + (seg == ind_l).astype(np.uint8))

            seg_i = cv2.morphologyEx(seg_i, cv2.MORPH_OPEN, np.ones((1,2),np.int8), iterations=1) # denoise
            seg_i = cv2.morphologyEx(seg_i, cv2.MORPH_OPEN, np.ones((2,1),np.int8), iterations=1) # denoise
            seg_i = cv2.morphologyEx(seg_i, cv2.MORPH_DILATE, np.ones((3,3),np.int8), iterations=5) # dilate

            retval, labels, stats, centroids = cv2.connectedComponentsWithStats(seg_i, connectivity=4)
            if retval < len(ch_dict[ch]) + 1:
                return None, None, None, None

            stats = stats[1:].tolist()
            if retval > len(ch_dict[ch]) + 1:
                stats.sort(key = lambda st: st[-1])
                stats.reverse()
                stats = stats[:len(ch_dict[ch])]

            stats.sort(key = lambda st: st[0])
            for idx, stat in enumerate(stats):
                x, y, w, h, s = stat
                s_mask = np.zeros_like(seg_i)
                s_mask[y:y+h, x:x+w] = 1
                seg_i_mask = seg_i * s_mask
                segs[ch_dict[ch][idx]] = seg_i_mask[None]

        segs = segs + [np.zeros_like(segs[0]) for i in range(self.seq_len-len(segs))]
        seg = np.concatenate(segs, axis=0)

        h, w, _ = image.shape
        m_top, m_bottom = int(m_top * (self.H/h)), int(m_bottom * (self.H/h))
        m_left, m_right = int(m_left * (self.W/w)), int(m_right * (self.W/w))

        image = cv2.resize(image, (self.W, self.H))
        seg = cv2.resize(seg.transpose((1,2,0)), (self.W, self.H)).transpose((2,0,1))
        mask = cv2.resize(mask, (self.W, self.H))

        r_bbox = torch.tensor((m_top, m_bottom, m_left, m_right))

        return image, seg, mask, r_bbox
        
    def __getitem__(self, index):
        
        self.count += 1

        while True:
            
            item = choice(self.items)
            image_path = item["image_path"]
            seg_path = item["seg_path"]
            text = item["text"]
            bbox_str = item["bbox_str"]
            bbox = np.array([int(v) for v in bbox_str.split(",")]).reshape((4,2))

            aug_text = choice(self.word_dict[len(text)]) if uniform(0, 1) <= self.aug_text_ratio else text

            image = Image.open(image_path).convert("RGB")
            seg = np.load(seg_path)
            w, h = image.size
            image = np.asarray(image)
            image, seg, mask, r_bbox = self.augment(image, seg, text, bbox)

            if image is None: continue
            
            image = torch.from_numpy(image.transpose(2,0,1)).to(dtype=torch.float32) / 127.5 - 1.0

            mask = torch.from_numpy(mask[None]).to(dtype=torch.float32)
            masked = image * mask
            mask = 1 - mask

            seg = torch.from_numpy(seg).to(dtype=torch.float32)
            seg_mask = torch.cat((torch.ones(len(text)), torch.zeros(self.seq_len-len(text))))

            m_top, m_bottom, m_left, m_right = r_bbox
            ref = image[:, m_top:m_bottom, m_left:m_right]
            ref = F.interpolate(ref[None], (128, 128))[0]

            # rendered = region_draw_text(self.H, self.W, r_bbox, aug_text if self.aug_text_enabled else text)

            # additional cond
            txt = f"\"{aug_text if self.aug_text_enabled else text}\""
            original_size_as_tuple = torch.tensor((h, w))
            crop_coords_top_left = torch.tensor((0, 0))
            target_size_as_tuple = torch.tensor((self.H, self.W))

            batch = {
                "image": image,
                "seg": seg,
                "seg_mask": seg_mask,
                "mask": mask,
                "masked": masked,
                "r_bbox": r_bbox,
                "ref": ref,
                # "rendered": rendered,
                "label": aug_text if self.aug_text_enabled else text,
                "txt": txt,
                "original_size_as_tuple": original_size_as_tuple,
                "crop_coords_top_left": crop_coords_top_left,
                "target_size_as_tuple": target_size_as_tuple,
                "name": str(self.count)
            }

            return batch


def get_dataloader(cfgs, datype="train"):

    dataset_cfgs = OmegaConf.load(cfgs.dataset_cfg_path)
    print(f"Extracting data from {dataset_cfgs.target}")
    Dataset = eval(dataset_cfgs.target)
    dataset = Dataset(dataset_cfgs.params, datype = datype)

    return data.DataLoader(dataset=dataset, batch_size=cfgs.batch_size, shuffle=cfgs.shuffle, num_workers=cfgs.num_workers, drop_last=True)


================================================
FILE: dataset/utils/words.txt
================================================
the of and to in a is was that for as with by on are from be or his were it an at not which have he had this has also their but one can its on the other been more they used first all two citation than into would only time who most may such some many when after between over these her about there use no them new him will out during made both then often so any being such as where number could main p. through system people known each while if called convert same later three because well work before the same under part very different became year did large example several city early until much government found own since she even form power do those around state including set high life against second century within world still end using small name what now usually American without however began like as well area make common the most water United States another way due must long less four death said film order due to back public does left based few become known as s given country major British place group considered among game point used to period support war music down million important systems control should took day family language last original result political line members case as well as see single just process along similar take following we although countries right either times areas published the other local include population never data home every various the time modern further development per how led possible military popular term though history generally you off rather men law developed German held human production body general the world light sometimes states late field based on having came above available book others York next created U.S. show himself out of wrote days died word play again great service age seen children level released works continued pp. the two five higher species energy required change means team January information theory New York produced making built design role addition included almost side position groups able de land total range July national space written social version Europe season force air allowed largest good type itself received women low throughout taken standard little least that is free cases size thus school especially old upon particular terms effect provide lower certain together present always short parts words third April described too up to established might played forces natural June once months rate European numbers six man rather than hand typically value England London October could be final the country September average France instead current December international program character increased a few surface across thought company followed best provided economic games significant named function building went return uses fact study below full source lost America person a number changes longer research individual languages strong structure party larger run open cause aircraft away far region need food forms increase outside started material cannot November half head market near record traditional special style all the sent story February player designed top at least themselves model returned band because of help types come points added events network limited services nature army former father close view allow won specific elements practice lines gave pressure introduced produce moved whether religious put official movement my Germany students trade method attack in order problems art eventually evidence referred results caused remained influence success meaning brought believed enough features give young white culture problem characters lead action according referred to conditions performance according to effects amount black business working better difficult temperature education real money smaller create located directly sound leading ground therefore get private formed particularly Chinese television subject south cities album class industry computer beginning community already complete go players associated related physical our town move complex interest rule speed commonly in order to rights living for example greater writing find growth killed court levels house against the office done shows Spanish needed saw central entire fire son books mostly access soon today earlier variety additional percent foreign ever recorded future widely title all of shown successful radio project construction changed king remains mass personal policy code includes involved Africa idea names separate base length rules key units likely makes release cost church films majority primary performed methods stated appear whole so that member allows decided Japanese reported sold capital science society rest stage event whose recent color legal highly career song frequently placed simple defined me mother appeared India schools turn simply relationship multiple nearly sense past relatively forced software attempt north William quickly companies programs products direct site sources previous technology battle front provides necessary served originally along with approach image seven experience Greek parts of text towards completed memory ability commercial UK health replaced worked yet start economy highest property behind knowledge test response largely sea reached president wide night operations supported act training reason Britain list issues occur loss the government reduced active functions remain lack to do clear gas island cells taking contains takes approximately exist basis distance independent intended elected product adopted actually wife ended born Christian keep potential course matter love issue objects things heavy oil center regular George the following divided eight date James summer direction laws numerous account income individuals concept plan announced Russian failed applied values northern proposed ships resulting estimated appears continue Canada red married object studies media engine passed Henry Australia machine quality founded hold say parties letter Japan expected treatment normal signal blood financial status video older carried compared know wanted accepted via read each other types of animals turned unit feature southern increasing met prevent songs applications require opened marriage contain equipment playing section remaining properties asked the case ten Charles activity whom ways buildings basic probably running materials stories edition location noted cell analysis Russia Rome removed becomes paper activities billion fall child cut operating degree ship occurs less than novel completely except inside Robert troops Jewish report call flow metal appointed offered told frequency initial campaign discovered definition historical models allowing operation police poor weight not have ancient here primarily agreed hit west heat security David claimed positive easily environment scientific going regions ISBN ball tradition reach requires St. extended charge female face serve equal Italy letters refused immediately believe supply effective internal versions cultural leave th mainly negative soldiers question spread suggested Paul
difference unique fully musical relations civil cover something board becoming famous hard plants situation workers Indian shot medical instead of risk device decision techniques weeks previously western phase responsible purpose race article gives reference giving authority critical sets Italian in the world ideas California growing element instance kept moving rock share nuclear causes reduce vote efforts table output car determined win era notes finally ordered application develop distribution represented leaving quite the population woman devices contrast offer tried presence the top mean scale us river collection content attempt to says powerful comes the best humans travel resulted proved African combined road felt page receive tax address teams connected pay annual despite disease birth closed kind agreement actual price claim goal management flight rise fuel brother maximum month presented write east male gold reaction powers resources latter plant middle motion observed rates your currently station appearance volume avoid consists deal families argued room scene user depending advantage weapons target prior records temperatures speech spent focus mission dead constant existence lived daughter signed relative blue occurred helped damage meant actions professional friends contact places solution Asia beyond meeting structures standards care exchange peace sequence carry county nor organization chemical showed factors federal transport capacity Washington e in that meet be used to claims upper pass perform attacks extremely effort the public opening tend initially literature alone attention paid sides useful centuries fixed follow winter Thomas crew origin serious alternative differences featured responsible for centre pieces strength measure examples track plans officers shape star Richard joined refer command existing cards leaders figure plays Louis piece houses classes conflict fields stars th century composed positions vary round gain tend to seeAlso impact slightly behavior depending on look as to nothing raised hands compared to reasons toward choice heart identified creating maintain refers condition constructed overall reading marked fourth division failure saying tour recently formal unable have to needs mind stations digital religion Spain think grew represent typical parents display competition acid and so fell week determine protection wave users build processes projects return to technique to keep fish cast drive starting unable to expanded affected entirely combination forward resistance entered factor equivalent attempts episode attempted away from providing sector external increases university ice ones achieved decades expressed Americans double demand regarded refer to architecture audience issued fight citizens match in all aid limit sexual goods seems costs distinct granted perhaps improved green file need to on to importance electric significantly DNA cycle normally components describes creation TV layer tree bring fighting stop and also core format increasingly thousands animal leader understanding break unknown classical extensive sign Israel the people host causing destroyed protect describe Peter arrived square voice industrial purposes magnetic containing finished follows nation rare cars continues that time planned radiation recording advanced engines principle student islands philosophy sought officials rapidly recognized secondary particles pattern staff covered runs traffic awarded lives deep the whole United Kingdom expansion identity context controlled images organizations derived Ireland administration bodies opposed friend chosen greatly and not designs nations surrounding urban zero otherwise error native setting gained why territory acts artists communication launched J. regional card freedom coast visible begin contract arms showing the past message generation involved in Mary nine leaves stone ran global popularity broadcast minor sites instruments improve wall contemporary soil Edward carbon conducted symbol as one at the end operate somewhat be seen close to news communities developing influenced domestic port machines figures views got declared note search contained employed input step lead to bridge respect worldwide caused by the idea captured want maintained route achieve publication consider magazine orders winning formation Australian ring reports onto subsequent details safety storage plane climate World War II accept connection the field dark managed practical shared be found measures visit capable movie spring calls independence obtained aspects dance learning opposition begins towns split brain interest in Jews at all technical directed introduction belief border owned prior to Chicago doing involves suffered transfer offers enemy correct possibly oxygen greatest exists measured closely policies dedicated press producing scholars steel heard accounts usage computers daily walls Canadian fiction rejected stable score engineering ensure indicate mentioned eastern forms of path skin proper trial oldest statement mixed most important officially portion reality the book dry heavily height authors fast hundred specifically author kind of block channel string Mexico Jesus institutions instrument residents combat selected vehicles coming manner rarely Irish goes hot pair royal neither organized effectively expensive really suggests processing opposite severe operated depends characteristics regarding yards enter interests glass college a high practices testing bands the story trees ends facilities place in faith grow slow armed churches mode background job writers defeated cable leadership transmission patterns thing Dutch density attached big bottom atmosphere networks degrees dropped electronic i might be strongly items roles translation pages apply assigned signals extent viewed Michael debate minimum periods programming visited ultimately frame hardware prices whereas as a result establish permanent holds strategy escape authorities acting join listed sex edge screen decade critics cold continuous generated solid violence liquid iron investment likely to sufficient composition broke experienced et exactly map ratio documents anything questions twice North America converted elections slaves threat time in studio contributed household wind train truth changing for each rural capable of sales secret bus football movements theories labor modified Christ cross expression stock atoms chain earth leads understand box impossible principles requirements weather considerable director earliest vehicle added to moral Johnson district literary respectively am set up prepared subsequently preferred trying sports mechanism concerned courts leading to task treated voltage translated hydrogen inspired victory advance appropriate faster nearby theme to go drawn standing arts parallel adding component relationships waves focused kill locations notable printed separated resolution tools officer subject to the amount essential medium opportunity solar etc. teaching argument don't Columbia GDP cultures painting tests bit amounts molecules roads launch crime fleet holding learned stay stored pure balance and all successfully picture politics circuit request Joseph environmental distributed produces affect prime revealed eye universe fans patients wood governments Berlin occasionally selection taught acquired fundamental linear Alexander choose episodes n floor angle incorporated try benefit description die studied Poland dates list of artist flat protein concluded hundreds speak extra subjects approved electrons chance easy implemented husband ideal planning concepts grown understood extreme intelligence quantum receiving Scotland online knew unless Jones out to rapid shall Texas academic reputation the future experiments explained park eyes sections audio in fact persons store budget corresponding sister charged broken capture script rich work on
moves opera Williams progress logic massive root assumed writer post be able domain foot link bank library document someone criticism everything huge linked performances represents seats thousand obtain bad mechanical Martin come to crisis and others clearly colors sample instructions the air circumstances electrical inner made of scenes copies e.g funds prominent straight personnel rank adult stand losses speaking ago m exact support for category fit housing necessarily attended limits mark couple roughly seasons widespread remove a certain street opinion review concern entry wear decisions exception identify categories compounds council efficient algorithm markets performing suitable the majority electron in turn males reaching serving slowly possibility stages horse skills concerns collected mental moment younger let manufacturing suggest recognition variable village add substantial styles detailed visual learn Angeles tells weak guitar defeat delivered ending feel in particular passing abandoned discovery poetry lowest mathematical apparent designated gun varies seem the church ethnic gradually newly p plot supporting and/or reducing Los Angeles confirmed hair supplies zone W. looking drug involving pain sell brief constitution benefits declined efficiency spiritual titles channels explain fear merely representation filled instruction perfect orbit replace guns tracks goals University Press articles lands matters conventional apart encouraged newspaper probability settlement silver succeeded symptoms a man in general in two joint populations transition vast legislation theorem votes immediate mountains adapted fifth fine genetic signs assistance consumption existed indicated symbols traditions settled prison spaces bass formula scientists the front transferred shift chose entitled kingdom save shortly candidates mountain one's desire mobile send lot sugar taxes trading interpretation else proof reform serves charges bits committed BBC secure steps the line the point Arab fewer infrastructure seek outer relation sort absolute drop files interface net implementation returning New York City bound decline morning variations vector absence attributed cancer membership responsibility spirit chief classified discussed Zealand discussion landing principal historians camera opposed to branch need for ordinary regularly e.g. survive agents proteins seconds universal interview accompanied arrested distinction reign specified repeated New Zealand accurate agricultural jobs unusual carrying mounted Oxford executed vessels work in yellow easier evolved peak permitted promote seat suit arranged tube unlike evolution reduction union writings dangerous wild dependent diameter pilot survived usual define equation noise particle rail together with circle installed interior none sounds streets variation wealth consistent finding in use novels passes weapon executive experiment exposure forest hunting integrated vertical hour protected restricted Egypt exposed partly boat fashion situations Virginia conference draw planet links strike tell outside of printing sentence depth Jackson at that criminal identical other than returns roots telephone agent argues Dr. communications engaged females convention frequent print i.e. uniform be an texts velocity transportation years old Hitler falls seemed Elizabeth administrative closer anyone debt references representing agriculture laid interested duties Wilson bear broad didn't physics finite server throne attached to award copy in addition meat fluid invasion affairs experiences median ceremony paintings vision IBM credit disk in front killing km losing demonstrated faced apparently composed of measurement regard wine writes stopped the present Sunday quarter tanks spoken Asian bar finds funding given to incident routes comparison procedure begun large number museum Hollywood birds experimental mathematics raise ruled biological consisting solutions theatre twelve drawing employees reference to spectrum banks damaged reactions stands blocks characteristic firm homes or not visitors clock agree favor ready safe prove Francisco accused flying an average desired painted varying the law Christians answer consisted evil traditionally brown interaction notably appeal extension recommended stability be in illegal bill forming industries it's lasted trained assembly candidate herself option association beliefs committee essentially percentage sons sum happened hope indicates minister von Germans layers Soviet Union purchased gender perceived promoted the season actors aspect deaths driven effect of stores Super Bowl clubs concentration worth arrangement as a result of keeping approaches surrounded Sweden chapter mouth plate revenue league phrase classic mixture researchers tool years ago calling errors lay reflect so-called statements hospital operational passage concrete plus thin arrival concert conduct heads offices conversion drugs for which messages treaty brothers deal with featuring the military attacked difficulty jazz strings try to la in front of pitch rotation elsewhere medicine miles comic fishing receiver column covers duty demands earned axis sleep want to web hence purchase Davis turns as of bought criticized generate Christmas hypothesis the data fired lose turning divisions in place at the same time businesses emphasis farmers label living in themes offering caught poem switch alcohol murder reflected youth senior boundaries handle historic indeed lists scheme superior arm connections dominated sun the greatest campus soul detail continuing master selling infantry missions the individual I'm random rose slave wrong destruction distinguished piano proposal sport partially producer rising suffering waters foundation artillery doctrine magnitude atomic boundary speakers accuracy department nd consisting of inhabitants meetings platform railway struggle thinking camp dominant getting in addition to ranges supports varied calendar educational papers enjoyed isolated to come boats sale door passengers tape Mr. diplomatic employment optical branches controls formerly milk x argue colour downtown influential solo Christianity editor empty owners rivers South Africa credited challenge classification controversy count worn characterized phenomenon responded retired fresh horses notion organic musicians snow involve vacuum attracted narrow technologies Philip boy interpreted invited made up depicted occupied surviving website destroy hearing organisms Muslim labour missile targets cited empire execution tail grounds lies index more than one marks partner restrictions creates controversial manager stress pairs remote restored festival carrier albums load specialized diverse kinds operator soft invented simultaneously configuration temporary differ focus on precise reforms bringing in part customers genre airport aware decrease denied live in the great codes complexity electricity exports stood frequencies purpose of quantity dating folk participate territories virtually buried peoples satellite equal to trip express properly quantities speeds perspective societies voted annually extend governor magic USA hole lifetime regime requiring wing Boston Netherlands account of equipped evening honor session steam surfaces DVD Lincoln admitted maintenance naturally make it separation thereby variables Lewis adults poverty retained armies wire ed. exclusively headquarters movies warfare roll United Nations aim explanation legs starts thermal Islam curve enemies lake objective volumes opponents rear civilian the market approval contribution fruit ownership prisoners gone ocean protocol regardless row tons facility reverse threatened wooden artistic fellow medieval meters stream Norway receives structural victims Mars Microsoft clothing emerged falling participants supposed attributed to fair fairly for two measurements tribes chamber consequence inflation operates owner stronger theoretical equally therapy Korea emperor offensive valid voting celebrated investigation origins spot supporters twenty Swedish recognize shorter tasks the ship tissue wearing revolution slavery San Francisco binary distinguish fought gods eggs injury a lot centers procedures
realized exercise hired buy disc long-term observation estate recovery Jerusalem mechanisms province Austria brand fly manufacturers trend bonds concerning dated functional observations substance independently logo replacement partial struck driving journey justice except for expand i.e intense one another publicly scheduled seeking warm aimed editions factory Vietnam difficulties plastic Arthur ranging Egyptian defend processor radar tank the East Frederick binding crystal motor rain spacecraft thick arguments attend hosted judge of age advice naval regardless of with this Indians everyone operators decay dimensions on which ties expedition racing sees formally sequences strict villages bomb contributions license underlying varieties Portuguese bacteria emergency gene spending breaking draft entering equations rely patent stayed colonies occasions tested tower covering divine reviews districts looked producers convinced infinite intellectual advertising maintaining maps teacher edited estimate indigenous races ranked coverage displayed dramatic explains cash concentrated courses guide referring samples sizes Brazil dog possession consist dispute distances improvements driver gravity hosts ions mining relevant winner Harry Stephen beam distinctive for instance passenger proportion regulations survey commission compound universities finish patient philosophical shooting strategic drum interactions radical relief ahead biggest contribute graphics logical scored poems this way consequences salt considerably bond complicated recordings definitions go to crowd explicitly wounded at which comedy delivery equilibrium alive applies competitive feed ill marry publishing actor agencies artificial compete Pennsylvania involvement manufactured metals neck NFL algorithms array keys readers requirement underground copper tourism conclusion consumer vice waste burned drew feeling insurance missing serve as tied as if by one eat jurisdiction nominated occasion retain talk voters addressed beautiful behaviour leg personality completion defensive gets violent supplied this point disorder intention rise to take place virtual wider capabilities custom describing diseases girls neutral illness imperial phone prey prayer provinces Israeli delay accident circular Philadelphia automatically negotiations reliable abroad adjacent calculated cavalry displays genes lyrics window database flag helps reduces register sharp stones dynamic farm missiles Pakistan attempting client influences referring to demanded entertainment flights hall potentially amongst opponent trains check facing familiar followers judges once again pointed rice chart cooperation ask newspapers obvious transmitted Greece alongside beings tends the throne women's st allies high school matrix sensitive stating beat casualties childhood composer much as terminal marketing coastal commissioned dollars entrance establishment penalty prevented shell compression faces Ohio quoted releases seeing Adams directions retirement a little kinds of Scott capita enabled spoke that one vowel mechanics molecular permission removal the common coalition conservative counties devoted titled wish baseball mainstream node registered engage notation singing atom funeral integral marine programme psychological rooms strip transformation shaped significance skill tournament Simon colonial constitutional detect engineers girl horizontal millions decreased hoped loop participation raw ruling god modes set in sky valuable coach columns drives fusion suicide remainder representative worship Francis compatible historian laser CPU c compact destination generations kings mail parliament recovered imposed in time minimal mix occurring ranks wings alliance coins consist of danger invention ongoing shares sphere acceptance assault believes environments relative to at that time handling liberal sure Apollo Cambridge collections in the past opportunities crimes deck deemed implies termed traveled Roosevelt bishops developments fitted minority altitude b corner decide exhibit foods keyboard momentum on one renamed collapse tall Kennedy bases depend divorce promised rocket wealthy Clark abstract corporate drink filter holes implement injured per capita walk rd Judaism excess hidden mention representatives spend synthesis Victoria abilities connect originated preserved rings confusion magnetic field resigned strictly unemployment Robinson orientation plasma portrayed similarly Islamic aged boys bright options partners planets Carolina Taylor addresses alleged bed institution rainfall well-known molecule pushed wars assist at one bears locally maintains winds export forth injuries vessel Florida historically awards bulk currency fallen laboratory manuscript capability colony interference minute physically Romans nucleus repeatedly vocal architectural eliminated initiated preserve trials all over certainly favour hear requested with it estimates wins alternate dependent on distant preparation sending the Union trust Albert ages banned creative psychology assigned to castle establishing firing heating by two drinking Vienna called for in response membrane no more parks singles drama extensively for it ministers next to retreat sixth to explain Kong conflicts phenomena powered sort of act as as it is Caribbean circuits headed identification longest permit punishment statistics variants as long as crucial dialogue saved scores teeth arrest commander fill improvement science fiction belt cutting des crown sole be called claiming immigrants literally restaurants restore taste accomplished participate in to sell Nazi agency bone campaigns chemistry happen personally pilots presidential recalled respond criteria enormous lie matches rescue variant wore corruption defines facts flowers gathered parameters quick solve tension audiences sight anniversary carries documentary encourage narrative reason for reportedly reserves teachers vital wheel arrive behalf diet seriously the road vowels yield infection perception poorly respective solely treat Saturday considering informed insisted interval teach van whatever Norwegian assets duration enable grant metric ports shock Jefferson Mexican assume attitude computing giant give the jury parent truly accessible burning crops gap profit shapes statistical consecutive determining drivers rational the distance adaptation briefly coal defence diagram doubt sfn acted appearances at home cooking dogs landscape prepare residence with respect to acids cabinet limitations oral sand come from consent counter discipline emotional encountered manage pictures severely suspended aware of smooth spin cargo correspondence pope publications valley warning bars entities altered challenges confidence eating jet lasting raising signature the many cattle deeply imported integration settings submitted tendency tubes deployed guilty nitrogen advantages intermediate open to framework only a few pop succession happens interesting predicted pursue detected in favor so much worst Illinois as such at the time of commercially disaster rely on survival Michigan dealing dimension filmed fled jump paint placing thereafter to death Catherine Howard manufacture temple Jim Napoleon as a whole cooling fan instances seed BCE Switzerland absorbed cool exclusive precipitation prize rival shifted watch willing Danish competing poet scope Hong asks departure depression dress presents reaches tries Olympic beer dust expanding firms guest lift regulation surgery wedding at first constantly legally reception walking analog confused discuss edges indicating muscle outcome schedule sessions settlers successor Bruce Roger exceptions hits masses meets delayed electromagnetic excellent geometry traveling Korean Muslims attacking libraries migration select tribute ultimate children's consciousness humanity priority feelings ignored intensity availability bones breaks carriers crop democracy fighters push apartment finance intervention make up shipping construct franchise looks situated Samuel catch craft dialects dramatically dual neighborhood opposing plates camps guard precisely verse North American borders dioxide gift nomination reserve reserved windows communicate consumed correctly engineer no one wet comprehensive unsuccessful verb Indiana adds diversity grand moderate prohibited v. Cuba appearing fail segment the City touch Moscow in the middle tables findings mutual nouns removing
tended travels Gregory Scottish dynasty hotel specification unlikely Nobel democratic flows protest sufficiently bishop dishes escaped founding judicial packet scales arbitrary possess appointment ceased dismissed in favor of settlements theater Broadway contents cotton holiday several times account for as much as atmospheric crossing detection introduce orbital organs purely revised actively afterwards aired c. embedded happy waiting angles bread concerts for that grave praised precision pump visiting acceptable at night charts condita connecting coup deliver layout readily rebels suddenly intervals manual pronounced proven transported acknowledged beauty bet cult demand for economics legislature priests resource surprise urbe Ab urbe condita compromise crash elite expense forcing prefer automatic clergy plain replacing tropical vulnerable approached cloud commitment everyday frames integer putting rounds substances unclear Sydney documented farming mine mirror moon of interest politicians repair spelling strategies bombing harm tip wavelength Afghanistan lighter victim Joe exile linked to measuring updated Jupiter carefully coffee fraction freely provisions replied stem agreements asking der doors enforcement forests panel religions reveals roof sentences worse Allen Hebrew cm continuously cuts garden lacked studying the club IP doesn't exhibition muscles utility MHz al. conservation departments item only if overcome racial tactics the community Lawrence clinical corresponds customs employ graduate instrumental phases sword the Indian transform adoption canal demonstrate dense educated fighter graph priest rocks stops Margaret bow collect electoral enjoy made up of overseas participated profile secured transformed Persian affects ban clay fragments joining lens processors ritual City of fiber pool proportional reveal sacred Andrew Marshall avoided combine focuses inspiration on top qualities sit Colorado arose chip disappeared experts nodes pg. publish recover separately the few theology Welsh arise collective contest gases heritage inherited navigation payment residential weekly Douglas bridges concentrations consideration diagnosis latest navy organ Hamilton Jane amateur civilians closing consistently friendly grade helping sophisticated withdrew landed legacy resolved romantic rubber shut clean coin critic developers grain grammar internet radius Alfred anywhere attributes depend on directors disputes entity knows sharing shots Finland Kansas funded nobility orange thrown travelled Denmark circles expressions fee friendship governed Missouri bid in large internationally legend mild Iraq Muhammad Turkish Walter assumption eaten storm surrender telling the stage dying filed heated noun sitting smoke subset Otto bird breeding examination fate follow the jurisdictions occupation beach formats ion sciences the earth Massachusetts Steve commands fires reader Berkeley belong contracts deals debut elaborate mature on the other hand portions relatives revolt temporarily the truth trouble evolutionary partnership presentation sectors shells shoot shops this day three times unity Georgia authorized consumers newer substantially Daniel Hughes chips households journal magazines occasional pollution scoring so as suffer working on assistant daughters ink not known proposals sail the dead ammunition cameras so far attract large-scale loved profits railroad swimming thrust uncertain wages year in World War I disorders glucose mines pulled terrain tourist wants Jordan advances battles blade charter console dream exposed to fat feedback adopt autumn explosion lighting attractive chains cognitive package renewed touchdown fictional primitive revolutionary singer topic Gilbert Wright antenna contrary improving myth reflects shore Harvard buses essay fantasy gay in the field mainland opinions powder drawings eliminate most likely numbered pick recognised suitable for Tony calculations comparable portrait simpler steady tunnel captain clients clothes customer descent disputed flood heavier minerals scattered the normal topics absorption algebra birthday f imports teachings at this time default mere rough segments silent the true Hong Kong decreases grid in public most often observe seeds smallest striking the same way desert feared legitimate paths picked stack Anderson crystals flexible fund inserted slower statue the poor Austrian circulation extending facilitate genus hospitals lesser settle threats zones Caesar compensation found on in practice politically siege commentary consensus contacts dies doctor hostile monitor parliamentary suggesting sustained the usual affair aggressive conventions correspond cycles guidance handled licensed promotion rebellion sea level whilst Norman awareness citizenship condemned dancing differs encoding exist in feeding lakes palace professor seventh submarine talks the information visits Donald balls defining ease faculty preceding Mississippi Swiss accommodate comments festivals servers tea width absent accordance civilization commented compositions emissions golden relating restoration totally uniforms Russell abuse arithmetic bandwidth eleven in love planes pleasure prototype sheet skilled squares targeted wait collaboration continent differently doctors emission imprisoned inches organisation permanently serial static Taiwan enhanced enters equality filming habitat y Great Britain championship flew lunar mentions merchants nose productions promise symbolic to date virus Star Trek USS clouds enables neighboring secular Houston South America al apply to contribute to exploration hull implementations indirect nervous ruler speaker aimed at applying gains lights privately sweet technological translations blow cancelled cuisine determines loyal photographs suspected accurately aims dissolved rhythm traits Jr. boards brings hierarchy sacrifice stadium too much Carl arrangements chemicals combinations currents dealt doubled orchestra NASA d elevation encounter healthy resolve thoughts See also advised banking cinema cylinder derivative effectiveness on the right radioactive satellites set to switching withdrawal Arizona Moore cluster Morris colleges descendants isotopes rotating verbs Finnish Franklin dubbed essence gaining impressed manuscripts pole posts queen wasn't Athens Friday bronze expelled informal legislative loyalty regions of remarkable switched the letter Cooper Warner continental discussions memorial promoting ride systematic the soil throw uncommon Alan citizen discover divide observer pressures render risks Kelly can't controlling conversation founder immigration interviews merged names of Harold In July baby evaluation schemes Portugal RAF adequate considers descriptions reflection reversed slight syndrome talent the record tie treatments In April Juan beneath burn compared with favored in common monthly preventing reconstruction thirty competitions composers conjunction passive seal search for suited virtue Disney blind enhance immune impression leather linguistic mammals mineral pp prompted tobacco witness Kent Shakespeare acquire bearing celebration counted on the ground polar ready to realm wishes Brian after which do with evident gauge intelligent interpretations nerve out with philosophers progressive pull the lead Cleveland assassination excessive human rights ingredients monks nobles point of view sculpture spite Buddhist Karl challenged coordinate extends fails favorite tales Iran Puerto amino assembled battery closest erected explicit gravitational odd payments platforms provision reactor requests the planet Catholic Church Europeans Middle East Thompson en intent it may be alphabet delegates discrete farms judgment rendered shoulder sin successive wished Czech Dallas allow for associations gate harsh investors naming peaceful reasoning European Union Indonesia RNA acoustic coordinates full of graduated hill yields Orleans Sullivan assessment belong to dozen drag governing incorporate revived self sudden the press trace Ruth arc civil war combining practiced rulers Anthony Sierra blues coat colours deliberately discrimination reasonable unstable administered colored digits in length pregnancy qualified retail theatrical egg knowing meal meanings processed protests soldier vegetation withdraw biography engage in engagement loaded martial regulated strikes stroke tourists great deal noble paying propaganda raids resumed trends uranium Julian casting demonstration
expect fifty hero registers remember transit unified Ottoman insufficient isolation municipal refugees sentenced timing welfare st century autonomy boom decorated del fame on behalf of routine synthetic the border topology Christopher Second World Stalin accounting bombs hills spite of streams the help tongue toxic tribe united Benjamin Minnesota RFC all but ancestors combustion deposits electronics exceed explosive gathering illustrated in charge notice shop slot solved succeed tiny Carter Diego New England biology conceived if not neutron perfectly preparing relied valve vectors The American architect collapsed full-time mortality odds accounted harmonic reads sing cables constitute dollar handed loans moments pursued territorial to the right Greeks artifacts assisted barrier ceremonies coined deficit finger grows guests in exchange manufacturer of course phrases revenues Melbourne Syria bias credits emphasized fifteen kills symmetry Intel colleagues drove forbidden monetary strips unions corporations gross mayor questioned volunteers Alice aboard fees friction interact labeled lifestyle no other resist servants shallow work for Gordon and so on bytes loose populated touring FBI Palestinian Stanley consonant container emerging expert march on top of pregnant provincial tale Second World War armed forces di entries lengths openly threw wheels with that Louisiana differing ethical fabric independent of responses steadily viewing Marx New Jersey ahead of arrives closely related extinction hands of merchant modifications nevertheless prime minister productive rifle set out versus a great deal animation choosing defended explore hopes integers loan melting meter offspring outcomes reproduction rigid sisters switches Nicholas The National choices confined du genome persuaded possessed pounds seasonal sodium ticket Aristotle Murray New Orleans aside compiled footage honour observers papal spirits vs. acceleration apart from centres chest convert to publisher shopping borrowed breed careful dances murdered nights plague preference revival wound animated creatures crossed flavor flower harder innovation introducing investigate operas specify vegetables Belgium Parsons arriving attendance contributing elevated enzymes insulin peasants protocols angular athletes convicted eligible mixing monopoly pressed sends to the point wireless Anglo-Saxon Harris amendment at times constraints dish failing objectives organism rating secretary Hungary Reagan broadcasting cap celebrate designer drums exit factories harmony inherent present-day remembered so as to submarines technically the open Mr Oregon calculation carbon dioxide chapters completing deeper done in elementary neutrons prevents pulse shoes Pierre counting fed optimal projected rolling seized terminology wavelengths Campbell Constantine Manchester depicting geometric justified scholar wage Hall of Fame NBC afternoon cellular derive ear examined gates lacking small number voyage whenever Wilhelm broader desirable differential organised printer resignation revision Middle Ages controller excluded in the future initiative kingdoms most recent slide so many turbine uncertainty assumptions at about autobiography balanced belonging focusing inheritance pace presidency relating to relies reporting sovereignty throwing empirical give a photo soils timber Harrison airports armour at the top conducting devoted to editing functionality guaranteed in accordance oxidation oxide performers predict similarities transactions wheat wish to Marcus Ontario archaeological as is clusters councils emitted erosion holy occupy proceeded riding shipped span tours triangle vapor blacks emotions favorable hip identified as looking for nuclei offset periodic plural proceed sheets Turner advocated believing coupled genuine go on hybrid limiting long time not allowed outbreak protective spatial Batman Moses designers foreign policy fossil hop seeks singular tensions undergraduate work as Manhattan Thailand The New York Times consequently domains heir more or less optional organize passages printers professionals punk ranking reliability Churchill advent cloth correspond to decreasing exhibits gear in relation to lessons margin modules monitoring orbits stick Constantinople addressing boiling cheaper commanded compiler convenient copyright factions federal government generating modest necessity not used on board permits rays reject two weeks asserted flexibility furniture highway installation nickname of the best reinforced talking Darwin Nintendo allied basketball carry out destroying dose horror import liver nm skull Leo Victorian Wisconsin anxiety broadcasts consonants contexts earn grants labels missed monarch physician travelling up for Collins Nixon calcium continuity enterprise fingers onwards outstanding relates simplified summit syntax assuming blocked collecting debated implications lectures liked mid mood pipe pride resident responsibilities rod terrestrial the worst Amsterdam Latin America Prussia descended enacted freed in accordance with insects junior motivated overhead rituals twentieth valued Maryland Morocco achievement alter backed da decides draws fiscal livestock modification on the left patrol reflecting rows scholarship southeast survivors theological tissues towers Argentina abolished attitudes determination discharge explored referendum Atari Clinton D.C. I've Miami back of comment dreams dried northwest one or two proximity routing attending calculate cultivation exhaust ministry multiplication signing standardized strain University of California Zhou at once begin to brass communist emerge geographical hide hunt k oath philosopher satisfy Florence Milan Vietnamese chair curves emergence floating foundations inputs organisations touchdowns viewers Alaska Baltimore Roman Catholic Russians bell chess countryside declaration fatal filters grass keeps machinery outdoor panels predators traced trapped vocabulary wires Mrs. Parker arguing barrel competitors conquest hotels not use noticed restaurant sake southwest spots trigger undertaken CIA Eric Northern Ireland altogether ancestry conception derives dialect essays generic gifts myself omitted pay for salary syllable acquisition cooked costly decimal dominance donated identifying museums run by tactical the elements worlds Sri amplitude boost cheap enabling fever flesh graphic implied infant intact integrity nationwide photography prisoner protecting quarters seemingly suggestion tomb two-thirds vitamin Wagner bitter gardens innovative module praise prefix privileges sailing tonnes transmit Eisenhower Wallace chronic definite dominate drops extensions horn managing recreational saving scientist scripts shield the vote to this day valves Monday carved divorced execute fruits genres ratings representations respected undergo Luke Zeus choose to commanders declare gray in the middle of infrared literacy realistic safely thanks wisdom Alexandria afford editors inferior neighbouring participating pen pointing proceedings proceeds strange the gods the screen uncle voices Mitchell heaven horizon matching natural gas polarization practitioners tight Bernard Catholics Germanic armor axioms burial guarantee men's one day posted specimens stolen strongest treaties tuning achieving guards inhabited inverse neighborhoods profound projection urged withdrawn Antonio Stewart Torah allegedly badly conquered cream gather half-life imply more and more polynomial profession pursuit speeches trick victories Hungarian Mao brick by means of consistent with extraordinary kg lawyer managers militia not considered not exist prevalent providers quiet subtle testimony trail I will approximation conductor extract flash infected mistake moisture purple raid renowned stamp supposedly unconscious Ali Toronto adventure astronomical battlefield bore cat derivatives flux geographic intake invaded lifted minimize numerical opens oriented releasing simplest stamps subjected tenure venues abandon ballot champion compressed cultivated forty incorrect leaf offense sailed stressed toll Nazis Tennessee acute attraction case the defendant demanding diamond helicopter in charge of investigated memories monarchy nominal not know reluctant silicon Buddhism bay charity induced modify one time pass through peaks privacy stance tolerance venture adjusted backing be added belonged decisive dressed expectations harmful lateral mystery performs rivalry sunlight Baltic abundant freight inclusion look at owing predominantly premiere repeat struggled swing texture video game worker Anna Lloyd analogous barely collision compare conscious entropy eventual expenses finishing fission guided housed imprisonment prince
sensors traded tribal whereby whites counterparts deceased develops dinner exhibited lying many times planted programmes revenge sick sung surrendered that's wives Abraham any time bills closure enterprises fibers flowing illustration playoffs suburbs tens the necessary torture viable walked commerce frozen guidelines in love with irregular metropolitan poles puts respond to rifles rises the works upgrade utilized Morgan associate centered coil corps economic growth exceeded inland investigations morality photograph preservation prone pronunciation similarity thesis truck adaptations byte faithful functioning kinetic marketed possibilities resemble with the exception of Isaac Thai algebraic as for attribute bombers burden companion counts couples defending discoveries elderly ensuing flour incomplete presumably refined reward sensitivity shifts threshold weakened Fred Oklahoma Sony a million affecting answers belongs cannon comics et al identifies musician nutrients on the surface publicity put on surplus torpedo transparent Eugene Prussian albeit allocated averages behaviors concludes damages emphasize ethics fun g kick locked paradox portable reply spreading starred telecommunications witnessed Hudson Saudi cement curriculum grammatical namely not take regulatory shadow socialist structured Malaysia absorb accessed answered boxes coding criticised devised encouraging failures fault grandfather imaging investments marking modulation mythology regulate scholarly studios the specific transaction voluntary warned Byzantine Ukraine Venice aerial disks efficiently fortune ideals incidents made for masters packets rendering speculation tiles tones up on youngest abundance arises certified clause conclusions lecture negotiate occurrence productivity react rebuilt remarked sharply underwent Holland Jonathan achievements ally as far as assumes cartridge copied cut off everywhere explaining farther fastest inability liberty obsolete point to prayers resistant weaker Hawaii arrows beta dam eternal fertility filling hardly helium indirectly receivers registration sealed sheep soundtrack staged sustain the crowd triple Einstein Linux Lucas Peru Pius actress chairman citing continually crowned enforce feels freezing globe journalist obligations overlap trademark venue warriors Afghan Native Americans Rhine Vatican barriers clocks cup exercises free of mouse out for preceded rated rejection spectral videos watching widow Boeing Columbus Denver Dick Matthew Saturn deployment designation earning elder email explanations loading lock marriages mile painter syllables Jan apparatus arrow aviation bullet contracted elect imagination in case kernel lengthy negotiated put in put into sat Bulgaria Pittsburgh about to accepting appear to be artwork breakdown buying considerations crews dropping flooding incoming interpret mate specifications stationary thickness topological unrelated Montgomery Native American Venus absolutely aesthetic as soon bigger burst conspiracy discontinued firearms in conjunction interfaces patents specially substitute Nigeria Warsaw aggression approaching astronomy avoiding enzyme fraud functions of invested log marched ordering predecessor relate salvation stems suppressed the latest versa Bulgarian South Carolina baptism beaches cache enforced hitting lamps people's propulsion prose secretly shaft sponsored stretch tune veterans Abu Hispanic Iowa batteries colonists correction couldn't customary debts formations formulas grades likewise profitable refusal rhetoric shut down supreme vice versa IEEE Nelson Romania Talmud Watson adapted to compensate consume cruise curved economies eighth grey handful high-speed highways pack rope time was universally urine Armstrong accidentally amplifier appealed arch convince declining in conjunction with individually naked problematic ratios twin Jacob adverse aided alike as soon as depiction ears indication kept in loses metallic scenario upgraded Austin Chris Hindu Lois Philippines bonus complained cousin damaging extant illustrations lacks predictions slope taxation ers Patrick admission ambiguous be at cleared composite deny desktop firmly governmental mothers packages resign sequel thumb adventures approximate at sea capitalism cave cricket crude encoded forever generates hub insight loads missionaries norm owing to payload poll seldom senses spread out whales Anglican Arabia Herbert advocate autonomous bin encounters exceptional family size gallery gradual hoping humidity kilometers mobility neurons northeast passion realize regards sailors subjective to the left Claudius accidents cavity championships commodity connection with contested in return peers picked up prints publishers r refuse rivals servant short-lived Roman Empire bankruptcy bottle cathode combines entitled to extinct helicopters honey republic validity winners witnesses Albania Kentucky Methodist Ross alien aligned analyzed ancestor announcement busy cartridges delays disciplines ecological ensuring envelope facto galaxy gaming grains innovations limbs magical merger norms relativity temples tense trips vendors Singapore activated after the war applicable biographer calculus climb cubic displaced economically famine in question interfere journals jumping ratified storms successes till vulnerable to Turing ambitious anger coming from computation computational contained in disabled employs grouped impose in power man's not possible paved refuses relay shortened simplicity sketch tuned underwater Americas Atlanta Cuban Detroit Friedrich Marie San Diego a bit cathedral diesel dynamics eldest happiness lawyers leagues licensing pension ray resonance satisfied sovereign thoroughly tidal watched Hz confirm conflict with denote diminished expertise extracted humor infections knights lung other side receptors Munich Plato alpha be determined challenging conclude cosmic dancers discs enrolled fears hiding journalists judged myths observing parade photons pink refuge retaining tenth the arts the string weekend Yale alternating angry breast by far compatibility displacement harvest impressive mandatory neighbors parameter statute terminals trucks verses Curtis Protestants Stuart USSR agrees births blamed denominations drought early years estates expressing justification microwave prominence stomach strengthened suspect the ring tournaments transitions weights beneficial blades committees comparing costume diving heroes in this way inexpensive invisible number one prescribed receptor simulation surprised violin wake Euclidean Nancy activists automated axiom bound to box office button canon compulsory crushed dice distributions drainage flame fur impacts in the air nationalist per cent summary transcription AFL Evans Venezuela all that appeal to climbing congregation connects dependence documentation employee examine homosexual inadequate listening lit lungs no. realism sampling signaling socially stake suits transmitter Liverpool North Africa electric field faction healing instability lamp lowered obtaining peaked prone to recommendations rockets rounded spinning the score unnecessary wartime Richmond Seattle assignment consistency in effect monastery onset robust scandal sensor triggered Edwards Panama Tokyo coefficient collectively de facto diagnosed exterior formulation grace he's inch interests of loops lost to municipalities practically sufficient to suspension warming weakness Barry Dave Rogers basically dealer debris generals ideology monument parking perpendicular rectangular the negative to retain violation Tim asserts buffer chord commit cure debates dozens earthquake exhausted fourteen in prison nervous system of language petroleum proclaimed readings rebel searching strengthen African-American Augustine built-in cheese elimination exported goddess governance internally liberation loud medal motivation placement recruited resort riders sauce sensory silence specialist traces tracking bonding bounded dissolution dividing exploit genera inscription likelihood obliged phones promising rotor sulfur Voyager accent bomber breathing complement frontier imagery meaningful not allow obligation persecution probe quantum mechanics solving the Academy theaters Giovanni Socrates USB aging auxiliary bind corporation decorative exercised mapping playoff railroads relocated rider seals specimen the eye the other side toured Dominican Serbia XML adjust call for chicken comfortable comprises flee hemisphere isotope pm postal preliminary protagonist somewhere suppress the religious treating Gary Syrian backwards bilateral but for coefficients convenience if and only if nuclear weapons princes progressed prosperity recall restriction slowed spare warrant Utah celebrations chaos critically diffusion drinks excluding
fulfill galaxies in place of lawsuit mount sorts terrorist transfers verbal winters Alabama Joan at most bag brilliant converts creature decree deities depicts distortion ensemble feathers finest hockey induction latitude like to mutations peripheral plains pushing reactors thread volcanic wolf wool Hugh armoured digit honored jumps makers melody nationalism obscure predecessors programmers propagation provided for quarterback replication selective supernatural telescope traders tragedy Catholicism Colombia Indies Quebec admit celestial come in congregations correlation deposited desires doses favourite financing ft identities impulse injection instant knife not so numbering pocket resisted simultaneous spontaneous submit the Americas the particular triumph unrest unusually valleys Clement Julius Lebanon Sanskrit accelerated compilation defenders detector distance between enclosed extraction jail lasts loosely manipulation prosecution robot sexuality sixteen speaks stationed statues stopping subordinate superiority the numbers wounds Dewey Fourier HTML IMF Jerry Oakland Princeton Tracy accompanying basin downward generator guilt hanging incorporates linking matched photos physicians possessions pot prestige pumps tickets uprising upset voiced Edgar Larry Ltd. Nevada Roy UV Vikings appoint assert comprising executives exotic flown give up heading interactive parish pin reviewed short-term sympathetic tunnels Charlotte Palestine Rio chambers chapel comparatively contemporaries generators governors in full irrigation lattice lenses marching nephew plots presenting prolonged reprinted ski soap supplement surname yearly Belgian Calvin Celtic Gandhi Rico a thousand advancing biblical broadly concentrate dome formulated in support of inscriptions intentions monasteries poetic proportions rally resembles variance barrels brands careers chromosomes conferences creator designing destinations evenly instructed lanes nonetheless poets proprietary starring thirteen zinc Ferdinand Google New Testament Red Army Stanford Yugoslavia collaborated converting defects diabetes exchanged fort intersection key to manually natives one-third ought persistent potassium preparations quotes reconnaissance router salts smoking socialism sporting stretched two or three upward vol. volunteer way in welcomed with the help of Baptist Freud Ralph adapt agenda any one attorney battalions contributes cooler custody definitive locate migrated nationally premium pupils reproduce shelter the picture update upright wholly Adelaide Edmund Hopkins Romanian Salvador afraid be allowed beaten bets competed complexes marble minds resulting from sketches the administration Benedict Brooklyn Reynolds affiliated authentic browser certificate chances chocolate controllers coordination degradation dictionary gamma guitars higher education implementing in high justify marginal mutually mysterious posed protons quote surveillance temporal unchanged I/O Jedi Lithuania Luftwaffe Oliver Raymond Sox Stockholm architects automobile backward branches of collectors convey delivering diary ensured grip isn't lover maritime mutation persuade petition pose pronouns realised regain singers the good Holmes Iranian Motorola blocking buried in commenced disadvantage excited favoured hereditary oppose outward planetary proposition restrict rushing short story suffrage symmetric tennis to the end vocals Bryan Lanka Leone acclaim anticipated cartoon circa delegation demonstrations dive drafted energies geography intentionally juice ninth ozone plantations promises thanks to to let unaware Canterbury Charlie Dublin Geneva Portland aluminium cabin constitutes foreigners mountainous packed partition pixel probable rods rolled tricks very much wildlife Jimmy Malcolm Naples Roberts Ronald a base allegations allocation blend canceled constituted deciding ecclesiastical emotion lovers mask noting pulling purchasing shortage tallest Academy Award Africans alternatives at any time at the bottom bacterial ceremonial defenses directing discussing engaging expeditions force in genius imaginary interrupt introduces lab luxury photon portraits prestigious prevailing punished purity qualify repertoire retains shifting short of specifies stocks susceptible unexpected warmer wrapped South Korea aftermath bowl chooses cleaning discovers dispersed encryption header ineffective manages mankind medals once more pipeline retreated run in sensation tremendous whale Baldwin Beijing Cameron Columbia University Graham Norse Slavic accordingly ambassador as seen exploitation fossils interrupted keen noticeable poison risen souls tin ultraviolet unprecedented Barbara Final Fantasy Ian Jesus Christ North Carolina Southeast Asia a vast acclaimed constructing disbanded do it employers invitation nineteenth substitution accusations calm cord daytime deer feasible fix general public govern halls in a way integrate masculine orchestral right hand the wild three-dimensional tide Royal Navy Yankees accepts clan common law corners demolished dot immense instituted learns not enough penalties positively semiconductor struggles suite surgical swept take up the complete tram Wayne accession advocates arguably axes believe in canonical cease containers costumes darkness departed emperors financially golf habits locals massacre of service pathway possesses reactive respects scarce scattering tariff terrible threatening Croatia House of Representatives Ryan Vancouver adjectives amplifiers bicycle brightness contrary to corn declaring down on fertile forgotten hash holidays in line in. incidence kinetic energy obviously ordained peninsula remarks rigorous semantics sins sleeping spiral steep stripped the article trilogy Wikipedia World Bank X-Men capturing configurations dams floors horns ideological indoor innings maturity midnight pipes screening severity substrate trails walks USAF Vincent Wednesday abbreviated accidental ambiguity breach civic discarded ensures fierce hollow in advance in contact inevitable meditation mistakes provider sensitive to solids stays take on the dark underneath websites Bennett Jamaica Steelers accompany agree to airborne allegiance ammonia aristocracy comprised continents convergence crust diamonds licenses listen literal precedent proton raises take over tightly und unpopular wax Ann Burton a hard admired arena beef benefit from complaints concessions constellation depended devastating discusses enthusiasm gasoline harbour in the way intensive lost in overwhelming pound premiered proving reproductive routinely smell soup spherical staple tough vice president Prague Security Council South Wales Taliban Western Europe civil rights corrected crashed criterion disagreement dye inquiry keyboards patron reunited revealing telegraph the strong unification uniquely wines ASCII Conrad Eastern Europe Sami almost entirely asset caves continue to be distribute employer employing five-year for certain mandate paradigm positioned post-war regiment rent slopes speculated supervision tract Arabs Brazilian Game Boy Sri Lanka Susan airline anchor appeals burnt destroyers differed dwarf enlarged inequality licence of choice penetration propellant regiments small amount stuck take advantage of the opposition warships Blake Hoover Johnny additions altar cartoons checked complications courage depths discourse folded fragment hat infinity joke large amount octave ore resting retire run on savings summoned taking place the possible trap weighed Helen Macedonia Madrid Personal life Puerto Rico Redskins Sicily Thursday a couple of averaging behavioral censorship doubles eliminating feminist flags gambling halt helmet humanitarian in a series magnet not change pagan prevention repairs reporter sexually silk unhappy welcome I'll Iraqi Johann Republic of China Vegas activation amended assess be heard be known as beams beds can do coastline constituent epic for long fuels geological heights liability pottery suffix Amiga Carlos Kansas City Nile attain best-known brake bubble deity differ from exploring ferry free will honorary hypotheses innocent leap legendary nearest networking objected originating patch progression surveys terrorism undertook Costa ambient aperture beside darker drain earnings evaluate expresses famously generalized humorous impedance importantly in spite intrinsic kitchen manga obstacles peasant prohibition put it refusing seated short time sperm tactic theatres think of toxicity transformations waiting for Solomon abortion calculating captivity corrupt editorial equals every day happening limestone marker mid-s notions posthumously prediction pulses reconstructed sang set by warrior Bismarck Edinburgh Nero Sherman South African Unix altitudes analyze associates astronomers boot cats climates consoles dawn dietary divides embarked fatty fights gaps ghost hammer
hurt in spite of infants inference instantly lifelong lightning mating neural performer physiological premises recurring rolls ruins runway rushed successors treason Athenian Isabella MTV North Korea assured blast butter ceiling come into comparative conflicting depict devil diagnostic differentiate disciples inherently mercury merge modeled oceans organizing outlined solvent suburban sums swim the special unofficial vertically viruses Chiang Ethiopia Gibson Luther Sudan Tamil USD at the start ballet conviction criminals destructive disappear experiencing for a time hate help to joints kidney military service motions quit real-time recipient referenced robots rotated senators submerged the details the general public trusted vibration violations wolves young man th-century Hepburn I'd aggregate alloys aluminum attracting blank contamination databases evolve hunters inform local government outlets owns periodically precious programmer seek to so long steal stepped strains summers t Gibraltar Latino Sarah backup be true benefited characterised colonization computed coupled with disliked feminine fortified fuselage hunted illusion illustrate mathematicians nest penetrate saints storyline survives transistors utilize well in yeast Bavaria Tucker accumulation after that anonymous beating by law conceptual denoted deposition exchanges folklore glands glory habit holder in the face of in the presence of indicator launching lineage pale programming language put to real estate recognizes sculptures separating trade with yard Buddha Kim Mann Theodore accumulated analogy anime annexation auction chromosome complementary coronation day in demonstrates drift familiar with floppy in service initiatives it will be lifting maternal metabolic numerals organizational presumed revelation spectacular suspicion Bros. José Qing Ted Terry Tolkien anatomy at the expense of circulated commodities counterpart crosses deaf distinguish between embraced explosives interim knight listing mud municipality polymer preferences screens touched vertices young people English language Las Vegas Sega athletic behave commentators disability download evaluated floods gradient hung infinitely junction palm presidents private sector semantic spectators subsidies susceptible to sustainable Carnegie Helena Kosovo Montreal Norfolk after this amounted be difficult bull by hand certification chloride chorus cocaine confrontation criticisms crowds defeating deviation energetic first place fitness guerrilla home run homosexuality in search of incompatible jointly miners opium passing through pathways phosphate platinum proofs railways rape the present day trait wanting Esperanto Irving Libya airlines amino acids be regarded compliance continuation dissolve exploited graphical inconsistent knot mirrors mistaken negatively oscillator pirates pretty treatise withstand Amazon Lutheran Manuel Old English Westminster attachment be on bison blame carriage contradiction cruisers dairy distinguishing drunk economists enlisted feast granting house in interfere with interpreter jerseys locks loves mice of use pixels quest reasonably reinforcements rejects seas slang the moon threads trunk unlimited wise Aaron Albany Milton Vladimir be expected be possible besides bold comprise depressed dining exiled ignore informally meals nicknamed panic ranged recession saint snake terror torque undergoing Arnold Caroline Connecticut Jacques Saxon Thomson battalion boarding brought about continuum distinctions drummer exceeding exhibitions halted humour le manned memoirs progressively rainy rescued styles of submission the limit Antony Hugo Jon NBA PRC Scots accounting for communal cylinders deliberate earthquakes feudal head of state histories leisure look for null popularized portrayal prepared for propose referee retrieve satire siblings staying strained surprising the crown updates wells workforce Chaplin Gaza House of Commons Wittgenstein acquiring afterward assassinated averaged canals capitalist civilizations compensate for consolidated dots embassy exceeds graduates hazardous helpful initiate lots motors offshore on land procession shoots specialty teaches the executive thinks tired Bengal Cromwell Emma Geoffrey Kevin New South Toledo Welles acres ancestral assessed cardinals cockpit comfort ever since habitats horizontally iconic inclined jewelry know the legends liquids methodology missionary outline pursuing repeating sanctions suppression tariffs theorists transistor treasure triangular vague vote for wherein Argentine Franz Leonard Macedonian Malta and the rest anyway bent communicate with cooperative coordinated deputy disposal endorsed in between markings mounting obliged to offence overview perceive pleased pressing proposes rotate run for screenplay shuttle spanning upwards usable veto Achilles Saudi Arabia Ukrainian all the way balloon coating compete with confronted deposit domination done with doubts fake for the purpose of hell heroic incorporating jaw limitation nominations painters paired precursor quantitative reconciliation removes resemblance revisions satisfaction squadrons step in supporter unwilling KMT Kant Kenya Normandy Powell beans captive except that gameplay healthcare homage knocked migrants millennium minorities ought to patronage pins prototypes proud randomly reservoir reside shortages tag tapes the living thicker torn transmitting yes alert cardinal carry the checks convex cooled deeds denounced endemic endings facilitated finale financed focal for life freshwater invade knee look like lord patented prizes respiratory result from shoulders throughout the world translate trio unreliable visibility Carthage Cicero MGM Odysseus Steven approve arrays ballistic be present be understood boss cone critique deficiency devastated gang highlighted in combat intervene metaphor needle not occur p.m. piston propositions rats riots second edition sediment soluble springs straightforward vicinity violated work at Godzilla Harvey Hausdorff LP Nicaragua Star Wars The First Titus Truman accommodation adjustment attained cancel eighteen equator exponential function as hierarchical in print induce manipulate neglected outright parity principally recreation securities verified Central Asia Jew Neil airplane cassette centralized charging coasts coral distress etymology evacuated exclusion from the first glasses hostility hydraulic hypothetical in color institutional issuing oak oneself originate pants plaque psychologists set theory spheres suggestions underway vote in Chen Democratic Party Egyptians Gabriel Kabul Mormon Routledge Thames bloody catalogue collector compose displaying fall into fermentation holdings icon in parallel in writing kit pad patches phonetic rites salmon sits splitting substituted surpassed tears terminated the interests wherever Almost all Congo Galileo Lockheed abbreviation aerodynamic attacker certainty chords colleague conform convincing denotes derivation desperate eager exclude field goal four-year generous globally have done immunity in contact with in detail injected invest joy malaria murders nationality occupies owed relying resurrection shed sink solitary sorted sparked stresses the courts trade in turbines useless working class Alex Hinduism IQ Turks a poor acknowledge all-time archaic bare batch bricks but then comply contraction diffraction ed emphasizes endangered escapes expectancy galleries improves joins lady laying migrate modeling monk moons nerves objections overthrow perform a pigment pioneer pitcher residual sunk synthesized trajectory trivial wouldn't Birmingham Craig Falun Fraser Monroe New Mexico New South Wales Petersburg aquatic arising boiled bottles breeds conditional constructions diagrams equivalence expenditure flies fluids going on guitarist hire hosting indefinitely miniature other's outputs pitched pray prophet ready for restoring rush scan selecting smart spy stripes subsidiary superseded tails the summit Babylonian Dakota Kenneth Ron affinity be regarded as beats clearing decoration demographic depleted drying enthusiastic experimentation finals heart attack hunger inception incorrectly inscribed interacting left hand lions mercy merit morale offerings olive painful psychiatric snakes sometime spell spotted viewpoint virtues Hume Milwaukee Whigs all in assertion asteroid blockade casino chiefly chiefs commissions debuted ecosystem fireworks fundamentally gentle immigrant in operation in the absence of limb oils physicists plantation republics routers shortest streak subspecies sympathy throat woods A major Ethernet Macintosh Maine Mickey Sacramento a deal apple ash assign cane coconut coupling elastic embryo escort facial for sale hatred honors inversion its way make use of on earth passed by programmed prolific rebuilding resolutions sentiment settling sinking specialists suppose synonymous talents worried Barnes Delhi First World Grammy
Herodotus Ludwig Shaw accumulate alignment anthropology attested backbone classify coherent collisions composing compute demonstrating devotion fall in fragile fulfilled graduation guides inefficient influx live on luck more than half not believe official language onward persisted predominant prominently recommendation sails shocked skeleton slots stellar surroundings the duration tips v vertex African American Barcelona Tibetan Tuesday Viking adjective alliances armored at one time attracted to attractions be changed besieged campuses cancellation coloured companions compelled cow deteriorated directory disastrous ejected evaporation favourable finances float gel handles in society in the event of jokes lap like that no matter nobody pigments practicing scanning stimulus subject matter the authorities trillion voltages Gaulle Grand Prix Julia Oxford University Press Vermont alcoholic authenticity ceramic inaugurated inducted insertion institute matrices natural resources prevalence signatures torpedoes undergone Carson Jason NCAA antiquity arcade become one coming to comparisons conductors contingent depictions dispatched downstream enduring go into granite intercepted jersey legitimacy lesbian mill monuments occupying parallels pointer recursive the contrary weigh yielded ATP Boris Greenland Hitchcock Latin American Phillips Rachel Shanghai attended by cites conservatives contrasts corpse decomposition fluctuations fog forum imagine listeners modernization obey optimization pigs popularly relaxed repaired resembling scenarios tile topped troubled tunes unwanted Bosnia Burke DJ Hawaiian Mongolia RCA Sierra Leone States, the Tacitus all over the world antennas apartments breakthrough deadly dignity donations dragon ecology embrace ethanol executing expired follow-up glaciers gospel grams income tax larvae maximize metabolism preaching prefixes premier promptly propeller rails reporters reunion ridges squad sticks subdivided supplier suspicious talked to some extent upcoming volatile workshop AFC Algeria Babylon Beatles Cohen Gerald Hubbard Santiago The President VHS acidic comic book conscience cylindrical disagreed establishments fungi impractical inspection lasers microscopic mold monastic monitors ourselves potato potatoes recorder resume schools of seize telephones theorems welding Buffy Cornwall HD Hampshire Kirk Norton Pluto The International Vulcan and blue archives baptized burns cf fairy farmer figure in in production of old planting poured protects provoked rim ruined securing somehow spells sued targeting tertiary with regard to you're Allan Andy Dover Ernst Franco Gaelic Israelis Obama TARDIS West Indies Zimbabwe advertisements amendments bullets causal communion crystalline demise disasters encompasses fold for free garrison heirs hormone in reference to intellectuals intimate invalid jets linguistics morphology nutrient orthodox pioneered promotional radically relate to simulate tangent there's verify viewer Castro Eusebius abolition antibiotics breath brigades buttons checking commonly known constrained deprived duo enslaved epidemic eruption ethnicity expenditures expulsion homeland in reality installations lever lucrative pick up pledged pork prospect racism reinforce remnants slogan snowfall spaced stretching very well visitor washed won't BIOS Dennis Dorothy English-speaking HIV Hamburg Ltd O. Wilde anode aristocratic chase confirmation confusing cooperate defect diplomacy disappointed discretion drastically electrically entirety experimented explorers fathers fins flank frustrated humid in case of lined mathematician microprocessor misleading motive plagued prefer to presided radio station rap recognizable republican systematically take part in truce Bermuda Breton Clarke Indonesian Plutarch Taft answer to authoritative casual coincide commonplace delta descriptive dug emigrated fearing holders in excess it must be localized parachute pedal pit preserving privilege quartz radios realizes rid stimulated subscription transforms ACLU Albanian Brunswick Delaware Franks Haiti John Paul Taiwanese additive administrator adopting ambitions arrange cake coils congestion constituents contrasted daylight efficacy emit expectation first round font fortifications good and h in the background incomes inventory linguists lion mills narrator non observable packaging pendulum poisoning queue rebuild rotary see that skating stretches the Commonwealth theoretically therapeutic throughput transferring voter weighing without being First World War Leopold Mason Nasser UC West Africa abstraction academy accomplish alarm at this point celebrity configured conquer consulting contaminated cortex day-to-day disguise donation fence fleeing hearts highest point honest hook imminent in the hands of insect inventor lease mathematically melt not work oversee photographic politician polls portray rewarded routines sacked setup shade skiing spelled sponsor sworn under construction unfinished unsuccessfully veteran Adolf Athena Broncos Erie Estonia Gustav Horace Nietzsche PCI Paige Syracuse abnormal bark bodily commanding decks endurance escaping exceptionally fibre flooded grounded implying in private in the process of labelled landowners middle class monster multiplied nowadays overnight professors ratification theft triangles Alfonso PlayStation Quran Yemen alloy atop be due beads binds canvas catalyst cipher counsel declares fauna flavors fused hazard implicit infancy interviewed medications obstacle open the participant political party proponents recruit sacrifices subscribers sweep textiles visually void Basel Ernest Falun Gong Keith Mbit/s Mozart Palestinians Persia The King Tibet admits affection amino acid ancient times blessing derive from diagonal discouraged enrollment faded fried highlight install locomotives lowering memorable minus monarchs monitored nickel notorious papacy pleasant polymers promotes rabbis recovering reed romance safer sank so it is spacing spans thorough touching travelers uniformly Athenians Chapman Diana Ecuador Hanover Heracles Indianapolis Morse Scotia Wall Street adhere adherents aforementioned analyses angel antibodies archive bags bat classroom commemorate contacted contempt cook corrosion doubling elevations foul graphite greenhouse inventions kids landscapes modern-day overlapping pollen postponed puzzle responds rhythmic ridge shoe spinal stimulate strands struggling swords tags unto variously Assyrian Bobby Davies Hannibal Joel Khrushchev accelerate believers brakes bugs categorized come up constants convoy definitely differentiation disadvantages duel evolving grandmother harbor hostilities incapable journalism lexical lips live with metaphysical mystical negligible not found on the part of paragraph passed on password protested pseudonym qualification recognise recognizing rooted seating seem to be sexes taxi the Father transitional twins vegetable verdict z Cincinnati Hamas Hancock Luis Savoy Whig Xavier Yankee biased bleeding bride concealed cope cycling discovering elders exaggerated for a long time genetically in principle motives noon not included pads parody patrons plug prevailed radial reproduced rocky southeastern strengthening stuff textbook the weather unavailable utilizing veins voluntarily worthy years. Berber Bohemia British Army Great Lakes Gregorian Humphrey People's Republic The Roman administrators angels appreciation athlete backgrounds be thought of belts can not characterization choir conjecture deposed distinguishes first edition fitting fortress guy in with investigating lethal lightweight nomenclature of the dead personalities rented rumors strand sunshine the campus the unique tragic upstream very good Ambrose CPUs Copenhagen adulthood advancement affiliation algae anarchist appointments ashes back and forth boots care for celebrities confident confiscated connector dB denomination donor enthusiasts equity establishes first person harvested human being inside of liturgical mediation monsoon plea probabilities pulp recruits reel reformed replica rotational supervised supplemented textbooks trauma tumor turning point undertake upgrades Clara Heinrich House of Lords Huxley Indo-European Jerome Keynes Lenin Macmillan Omaha Somerset Versailles Weber a.m. air force angular momentum augmented be so bypass calibration discharged electrode fight against from the beginning get to grief heterosexual imagined impurities lifespan martial arts miss negotiation nurse provisional public domain quietly quotation secrets segregation to be seen truths two-year unpublished vastly velocities Cambodia Latvia Saxony Stevenson The European Whitney adaptive analytical anthem coated compass convened cows cyclic dedication delicate dictatorship dipole disturbed drop in encourages founders free from hybrids incentive infectious knots latitudes lineup methane mg narrower originates pearls populace postwar proverbs public opinion realization recipes reflective slip tender Bristol Church of England Goldman Hercules

================================================
FILE: demo.py
================================================
import cv2
import torch
import os, glob
import numpy as np
import gradio as gr
from PIL import Image
from omegaconf import OmegaConf
from contextlib import nullcontext
from pytorch_lightning import seed_everything
from os.path import join as ospj

from util import *


def predict(cfgs, model, sampler, batch):

    context = nullcontext if cfgs.aae_enabled else torch.no_grad
    
    with context():
        
        batch, batch_uc_1 = prepare_batch(cfgs, batch)

        c, uc_1 = model.conditioner.get_unconditional_conditioning(
            batch,
            batch_uc=batch_uc_1,
            force_uc_zero_embeddings=cfgs.force_uc_zero_embeddings,
        )
        
        x = sampler.get_init_noise(cfgs, model, cond=c, batch=batch, uc=uc_1)
        samples_z = sampler(model, x, cond=c, batch=batch, uc=uc_1, init_step=0,
                            aae_enabled = cfgs.aae_enabled, detailed = cfgs.detailed)

        samples_x = model.decode_first_stage(samples_z)
        samples = torch.clamp((samples_x + 1.0) / 2.0, min=0.0, max=1.0)

        return samples, samples_z


def demo_predict(input_blk, text, num_samples, steps, scale, seed, show_detail):

    global cfgs, global_index

    global_index += 1

    if num_samples > 1: cfgs.noise_iters = 0

    cfgs.batch_size = num_samples
    cfgs.steps = steps
    cfgs.scale[0] = scale
    cfgs.detailed = show_detail
    seed_everything(seed)

    sampler = init_sampling(cfgs)

    image = input_blk["image"]
    mask = input_blk["mask"]
    image = cv2.resize(image, (cfgs.W, cfgs.H))
    mask = cv2.resize(mask, (cfgs.W, cfgs.H))

    mask = (mask == 0).astype(np.int32)

    image = torch.from_numpy(image.transpose(2,0,1)).to(dtype=torch.float32) / 127.5 - 1.0
    mask = torch.from_numpy(mask.transpose(2,0,1)).to(dtype=torch.float32).mean(dim=0, keepdim=True)
    masked = image * mask
    mask = 1 - mask

    seg_mask = torch.cat((torch.ones(len(text)), torch.zeros(cfgs.seq_len-len(text))))

    # additional cond
    txt = f"\"{text}\""
    original_size_as_tuple = torch.tensor((cfgs.H, cfgs.W))
    crop_coords_top_left = torch.tensor((0, 0))
    target_size_as_tuple = torch.tensor((cfgs.H, cfgs.W))

    image = torch.tile(image[None], (num_samples, 1, 1, 1))
    mask = torch.tile(mask[None], (num_samples, 1, 1, 1))
    masked = torch.tile(masked[None], (num_samples, 1, 1, 1))
    seg_mask = torch.tile(seg_mask[None], (num_samples, 1))
    original_size_as_tuple = torch.tile(original_size_as_tuple[None], (num_samples, 1))
    crop_coords_top_left = torch.tile(crop_coords_top_left[None], (num_samples, 1))
    target_size_as_tuple = torch.tile(target_size_as_tuple[None], (num_samples, 1))

    text = [text for i in range(num_samples)]
    txt = [txt for i in range(num_samples)]
    name = [str(global_index) for i in range(num_samples)]

    batch = {
        "image": image,
        "mask": mask,
        "masked": masked,
        "seg_mask": seg_mask,
        "label": text,
        "txt": txt,
        "original_size_as_tuple": original_size_as_tuple,
        "crop_coords_top_left": crop_coords_top_left,
        "target_size_as_tuple": target_size_as_tuple,
        "name": name
    }

    samples, samples_z = predict(cfgs, model, sampler, batch)
    samples = samples.cpu().numpy().transpose(0, 2, 3, 1) * 255
    results = [Image.fromarray(sample.astype(np.uint8)) for sample in samples]

    if cfgs.detailed:
        sections = []
        attn_map = Image.open(f"./temp/attn_map/attn_map_{global_index}.png")
        seg_maps = np.load(f"./temp/seg_map/seg_{global_index}.npy")
        for i, seg_map in enumerate(seg_maps):
            seg_map = cv2.resize(seg_map, (cfgs.W, cfgs.H))
            sections.append((seg_map, text[0][i]))
        seg = (results[0], sections)
    else:
        attn_map = None
        seg = None

    return results, attn_map, seg


if __name__ == "__main__":

    os.makedirs("./temp", exist_ok=True)
    os.makedirs("./temp/attn_map", exist_ok=True)
    os.makedirs("./temp/seg_map", exist_ok=True)

    cfgs = OmegaConf.load("./configs/demo.yaml")

    model = init_model(cfgs)
    global_index = 0

    block = gr.Blocks().queue()
    with block:

        with gr.Row():

            gr.HTML(
                """
                <div style="text-align: center; max-width: 1200px; margin: 20px auto;">
                <h1 style="font-weight: 600; font-size: 2rem; margin: 0.5rem;">
                    UDiffText: A Unified Framework for High-quality Text Synthesis in Arbitrary Images via Character-aware Diffusion Models
                </h1>        
                <ul style="text-align: center; margin: 0.5rem;"> 
                    <li style="display: inline-block; margin:auto;"><a href='https://arxiv.org/abs/2312.04884'><img src='https://img.shields.io/badge/Arxiv-2312.04884-DF826C'></a></li>
                    <li style="display: inline-block; margin:auto;"><a href='https://github.com/ZYM-PKU/UDiffText'><img src='https://img.shields.io/badge/Code-UDiffText-D0F288'></a></li>
                    <li style="display: inline-block; margin:auto;"><a href='https://udifftext.github.io'><img src='https://img.shields.io/badge/Project-UDiffText-8ADAB2'></a></li>
                </ul> 
                <h2 style="text-align: left; font-weight: 450; font-size: 1rem; margin: 0.5rem;">
                    Our proposed UDiffText is capable of synthesizing accurate and harmonious text in either synthetic or real-word images, thus can be applied to tasks like scene text editing (a), arbitrary text generation (b) and accurate T2I generation (c)
                </h2>
                <div align=center><img src="file/demo/teaser.png" alt="UDiffText" width="80%"></div> 
                </div>
                """
            )

        with gr.Row():

            with gr.Column():

                input_blk = gr.Image(source='upload', tool='sketch', type="numpy", label="Input", height=512)
                text = gr.Textbox(label="Text to render:", info="the text you want to render at the masked region")
                run_button = gr.Button(variant="primary")

                with gr.Accordion("Advanced options", open=False):

                    num_samples = gr.Slider(label="Images", info="number of generated images, locked as 1", minimum=1, maximum=1, value=1, step=1)
                    steps = gr.Slider(label="Steps", info ="denoising sampling steps", minimum=1, maximum=200, value=50, step=1)
                    scale = gr.Slider(label="Guidance Scale", info="the scale of classifier-free guidance (CFG)", minimum=0.0, maximum=10.0, value=4.0, step=0.1)
                    seed = gr.Slider(label="Seed", info="random seed for noise initialization", minimum=0, maximum=2147483647, step=1, randomize=True)
                    show_detail = gr.Checkbox(label="Show Detail", info="show the additional visualization results", value=False)

            with gr.Column():

                gallery = gr.Gallery(label="Output", height=512, preview=True)

                with gr.Accordion("Visualization results", open=True):

                    with gr.Tab(label="Attention Maps"):
                        gr.Markdown("### Attention maps for each character (extracted from middle blocks at intermediate sampling step):")
                        attn_map = gr.Image(show_label=False, show_download_button=False)
                    with gr.Tab(label="Segmentation Maps"):
                        gr.Markdown("### Character-level segmentation maps (using upscaled attention maps):")
                        seg_map = gr.AnnotatedImage(height=384, show_label=False)

        # examples
        examples = []
        example_paths = sorted(glob.glob(ospj("./demo/examples", "*")))
        for example_path in example_paths:
            label = example_path.split(os.sep)[-1].split(".")[0].split("_")[0]
            examples.append([example_path, label])

        gr.Markdown("## Examples:")
        gr.Examples(
            examples=examples,
            inputs=[input_blk, text]
        )

        run_button.click(fn=demo_predict, inputs=[input_blk, text, num_samples, steps, scale, seed, show_detail], outputs=[gallery, attn_map, seg_map])

    block.launch()

================================================
FILE: metrics.py
================================================
import lpips
import os,glob
from os.path import join as ospj

def calc_fid(fake_dir, real_dir, batch_size=1, gpu='0'):

    print(f"evaluating FID score between '{fake_dir}' and '{real_dir}'")

    os.system(f"python -m pytorch_fid {fake_dir} {real_dir} --batch-size {batch_size} --device cuda:{gpu}")


def calc_lpips(fake_dir, real_dir):

    print(f"evaluating LPIPS score between '{fake_dir}' and '{real_dir}'")

    loss_fn = lpips.LPIPS(net='alex').cuda()

    fake_paths = sorted(glob.glob(ospj(fake_dir, "*")))
    real_paths = sorted(glob.glob(ospj(real_dir, "*")))

    dists = []
    for fake_path, real_path in zip(fake_paths, real_paths):

        fake_img = lpips.im2tensor(lpips.load_image(fake_path)).cuda() # RGB image from [-1,1]
        real_img = lpips.im2tensor(lpips.load_image(real_path)).cuda()
    
        dist = loss_fn.forward(fake_img, real_img)
        dists.append(dist)
    
    print(f"lpips score: {sum(dists)/len(dists)}")


================================================
FILE: pretrain.py
================================================
import torch
import torch.utils.data as data
import pytorch_lightning as pl
from omegaconf import OmegaConf
from sgm.util import instantiate_from_config
from pytorch_lightning.callbacks import ModelCheckpoint


def get_dataloader(cfgs):

    dataset = instantiate_from_config(cfgs.dataset)
    dataloader = data.DataLoader(dataset=dataset, batch_size=cfgs.batch_size, shuffle=False, num_workers=cfgs.num_workers)

    return dataloader

def get_model(cfgs):

    model = instantiate_from_config(cfgs.model)
    if "load_ckpt_path" in cfgs:
        model.load_state_dict(torch.load(cfgs.load_ckpt_path, map_location="cpu")["state_dict"], strict=False)

    return model

def train(cfgs):

    dataloader = get_dataloader(cfgs)
    model = get_model(cfgs)

    checkpoint_callback = ModelCheckpoint(dirpath = cfgs.ckpt_dir, every_n_epochs = cfgs.check_freq)

    trainer = pl.Trainer(callbacks = [checkpoint_callback], **cfgs.lightning)
    trainer.fit(model = model, train_dataloaders = dataloader)

    
if __name__ == "__main__":

    config_path = 'configs/pretrain.yaml'
    cfgs = OmegaConf.load(config_path)
    train(cfgs)

================================================
FILE: requirements.txt
================================================
colorlover==0.3.0
einops==0.6.1
gradio==3.41.0
imageio==2.31.2
img2dataset==1.42.0
kornia==0.6.9
lpips==0.1.4
matplotlib==3.7.2
nltk==3.8.1
numpy==1.25.1
omegaconf==2.3.0
open-clip-torch==2.20.0
opencv-python==4.6.0.66
Pillow==9.5.0
pytorch-fid==0.3.0
pytorch-lightning==2.0.1
safetensors==0.3.1
scikit-learn==1.3.0
scipy==1.11.1
seaborn==0.12.2
socksio==1.0.0
tensorboard==2.14.0
timm==0.9.2
tokenizers==0.13.3
tqdm==4.65.0
transformers==4.30.2
xformers==0.0.22.post7


================================================
FILE: scripts/preprocess/laion_ocr_pre.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os,glob\n",
    "import json\n",
    "from tqdm import tqdm\n",
    "from os.path import join as ospj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_root = \"{your data root}/LAION-OCR\"\n",
    "image_root = ospj(data_root, \"image\")\n",
    "anno_root = ospj(data_root, \"annotation\")\n",
    "cache_root = ospj(data_root, \"cache\")\n",
    "os.makedirs(cache_root, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8754781"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_txt = \"{your data root}/LAION-OCR/mario_laion_image_url/mario-laion-index-url.txt\"\n",
    "with open(url_txt, 'r') as fp:\n",
    "    res = fp.readlines()\n",
    "\n",
    "url_lst = []\n",
    "for r in res:\n",
    "    idx, url = r.split(\" \")\n",
    "    url = url[:-1]\n",
    "    ex_idx, in_idx = idx.split(\"_\")\n",
    "    if int(ex_idx) >= 50000: continue\n",
    "    url_lst.append({\"ex_idx\": ex_idx, \"in_idx\": in_idx, \"url\": url})\n",
    "len(url_lst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'ex_idx': '00000',\n",
       "  'in_idx': '000000012',\n",
       "  'url': 'https://www.rockfordsystems.com/wp-content/uploads/2015/04/kst194-p.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000061',\n",
       "  'url': 'https://s3.reutersmedia.net/resources/r/?m=02&d=20161208&t=2&i=1164715083&w=644&fh=&fw=&ll=&pl=&sq=&r=LYNXMPECB70YK'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000108',\n",
       "  'url': 'https://images.puma.net/images/907235/02/bv/fnd/EEA/w/288/h/288/'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000146',\n",
       "  'url': 'http://www.slicingupeyeballs.com/wp-content/uploads/2009/05/stoneroses452.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000199',\n",
       "  'url': 'https://amheath.com/wp-content/uploads/2016/12/stranger.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000214',\n",
       "  'url': 'https://www.musicalweb.nl/wp-content/uploads/2020/05/singalong_disney.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000247',\n",
       "  'url': 'http://rlv.zcache.ca/the_coolest_people_are_from_maine_cards-r3dc62ebdf5334ecb909eb464787226c8_xvuat_8byvr_324.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000255',\n",
       "  'url': 'https://i.pinimg.com/736x/eb/2e/f4/eb2ef48889d7ccd2c51b914a8e4cb7d5--alphabet-design-alphabet-letters.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000278',\n",
       "  'url': 'http://rlv.zcache.com.au/keep_calm_and_listen_to_the_great_egrets_poster-r65983dd96acd4ca8a7c3fcfe9f761802_wvu_8byvr_324.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000352',\n",
       "  'url': 'https://i1.wp.com/techget.net/wp-content/uploads/2013/04/Best-Apps-for-Movie-lovers.png?fit=560%2C315&'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000410',\n",
       "  'url': 'http://img.wfrcdn.com/lf/49/hash/18584/7768691/1/1/1.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000599',\n",
       "  'url': 'https://dynamic.brandcrowd.com/asset/logo/ca14c463-843d-4df5-9b7b-40dddadef740/logo?v=4'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000632',\n",
       "  'url': 'https://d3v4qu4rwgk1m7.cloudfront.net/wp-content/uploads/2019/06/24100253/Car-Wash-Kits.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000731',\n",
       "  'url': 'http://img1.imagesbn.com/p/2940011445422_p0_v2_s260x420.JPG'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000762',\n",
       "  'url': 'https://i.ytimg.com/vi/1sfyKP83vBY/0.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000791',\n",
       "  'url': 'https://img5274.weyesimg.com/uploads/y91cvhn0.allweyes.com/images/928d71a9b0a1bfc2f633671c010d28a1.jpg?imageView2/2/w/1920/q/75'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000868',\n",
       "  'url': 'http://rlv.zcache.ca/appreciate_iphone_case_iphone_4_case-r055f4e6687ea4ed2b4e3563faacab0d4_a4643_8byvr_324.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000985',\n",
       "  'url': 'https://3.bp.blogspot.com/-6rPl3MUmfDw/ViD57JQ9UHI/AAAAAAAAAEc/8sQaECCwOvc/s1600/Atlas%2BAnatomy%2B3d%2Bedit.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000000994',\n",
       "  'url': 'https://us.123rf.com/450wm/teamplayfor/teamplayfor1701/teamplayfor170100022/69327415-stock-vector-vector-decorate-cakes-with-cream-from-pastry-bag-isolated-illustration-on-white-backgroung-kitchenwa.jpg?ver=6'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001019',\n",
       "  'url': 'http://cdn.rekkerd.org/img/201503/famousaudio_atmosphericpianothemes3.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001043',\n",
       "  'url': 'https://etsytelemall.com/uploads/169f08c23dc9da2c8df7cd920ee286e0.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001091',\n",
       "  'url': 'http://rlv.zcache.ca/trick_or_treat_spider_print-r3328ccd5fa2147c0a2537824815caecc_6azo_8byvr_324.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001202',\n",
       "  'url': 'https://timtirelli.files.wordpress.com/2015/01/rock-hall-night-bluejpg.jpg?w=490&h=310'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001210',\n",
       "  'url': 'http://2.bp.blogspot.com/-N9nHT7V1J_A/UDqYxlV_q2I/AAAAAAAABXQ/HBNjQOS17L4/s1600/a%2Bblog%2Bby%2Bmischelle%2Bheader.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001231',\n",
       "  'url': 'https://capemarkets.co.za/wp-content/uploads/2019/06/Winter-Wonderland-Logo-300x300.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001252',\n",
       "  'url': 'http://images.tvfanatic.com/iu/s--HtSeWKj0--/t_teaser_wide/f_autofl_lossyq_75/v1414435450/attachment/the-walking-dead-rt-depreciated.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001277',\n",
       "  'url': 'http://clould.ohcosplay.com/images/product/acc7895.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001316',\n",
       "  'url': 'https://www.c4dt.org/wp-content/uploads/2019/10/News_CPI_slogan-1.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001396',\n",
       "  'url': 'http://i00.i.aliimg.com/wsphoto/v1/1345135562_1/AA-13-multiple-Asdrubal-Cabrera-jersey-Indians-new-white-gray-navy-ivory-authentic-jersey.jpg_350x350.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001432',\n",
       "  'url': 'https://www.anikama.co.il/content/images/thumbs/0005499_im-not-getting-any-younger-magnet_600.jpeg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001589',\n",
       "  'url': 'https://image.spreadshirtmedia.net/image-server/v1/compositions/T635A2PA1289PT17X26Y58D135375903S72/views/1width=300height=300appearanceId=2backgroundColor=E8E8E8/not-without-my-fatbike-maenner-bio-t-shirt.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001613',\n",
       "  'url': 'https://i2.cdn.hhv.de/catalog/475x475/00436/436572.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001675',\n",
       "  'url': 'https://elgurudelbasket.com/wp-content/uploads/2015/07/as17_new_orleans-e1511020319987.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001682',\n",
       "  'url': 'http://www.rapbasement.com/wp-content/uploads/2015/01/spenzo-dripping-in-gold-feat-lep-bogus-boys-300x300.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001703',\n",
       "  'url': 'https://ctl.s6img.com/society6/img/hFwzz_bCCX1PNjeEBKa1071X3vs/h_264w_264/prints/~artwork/s6-original-art-uploads/society6/uploads/misc/b6309341286b42b7afe25334ecd88968/~~/you-did-not-wake-up-today-to-be-mediocre1301106-prints.jpg?wait=0&attempt=0'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001774',\n",
       "  'url': 'http://files1.comics.org//img/gcd/covers_by_id/203/w400/203539.jpg?-4555063407778287876'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001816',\n",
       "  'url': 'https://www.davenportmachine.com/wp-content/uploads/multi-spindle-automatic-lathe.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001850',\n",
       "  'url': 'https://www.monsterbacklinks.com/pics/000/153/279/2bfeec8853935db7688c479e7d9cfd21.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001860',\n",
       "  'url': 'http://republicjewelry.com/images/upperdeck_logo2.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001886',\n",
       "  'url': 'https://guide.alibaba.com/image/i1/excellent-shellfish-love-baby-wipes-baby-wipes-ass-dedicated-small-packet-of-skin-cleaning-wipes-8-bags-free-shipping-wholesale-25-pcs/TB19CkiLVXXXXaiaXXXXXXXXXXX_!!0-item_pic.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001889',\n",
       "  'url': 'https://www.acupunctureworld.com/out/pictures/generated/product/thumb/375_375_90/points-series@2x.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001908',\n",
       "  'url': 'https://healthyhomecleaning.websitehabitat.com/wp-content/uploads/sites/49/2015/05/which-norwex-dryer-balls-are-best.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001920',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0505/0407/3414/products/100-pioneering-women-book-art-architecture-books-house-home-product-type-select-pricing-0-3000-ikkadukka-store-ikka-dukka-the-eclectic-online_947_740x.jpg?v=1602935507'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001923',\n",
       "  'url': 'https://www.slantmagazine.com/assets/house/5941/books_seenowthen__article-prose-260x.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001931',\n",
       "  'url': 'https://bcassetcdn.com/asset/logo/f016a010-62c1-4105-b0af-f59735620074/logo?v=4&text=Logo+Text+Here'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000001994',\n",
       "  'url': 'http://www.minimeandluxury.co.uk/wp-content/uploads/2019/04/Birdland-400x400.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002015',\n",
       "  'url': 'https://ssl.c.photoshelter.com/img-get2/I0000gnsMogy4NUM/fit=1000x750/20110623-0192.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002055',\n",
       "  'url': 'https://i2.wp.com/thepinterestedparent.com/wp-content/uploads/2017/07/ftgj-550x1024.jpg?resize=257%2C478'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002061',\n",
       "  'url': 'http://wwwcache.wral.com/asset/lifestyles/goaskmom/2012/11/21/11802796/craftymomturkeybaster-347x300.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002196',\n",
       "  'url': 'https://static1.bigstockphoto.com/thumbs/2/3/1/large2/132530996.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002307',\n",
       "  'url': 'https://1.bp.blogspot.com/-IzADoO3QH50/X9FHiyLmd5I/AAAAAAAAAG4/8uYW8olwIYcW78LkjqBEgi7JuaaGIY_agCLcBGAsYHQ/w680/o%2Bpapel%2Bdo%2Bmarketing%2Bdigital%2Bpara%2Badvogados.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002316',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0076/9258/2997/files/ABP_-_About_Us_51fbde1d-d702-4604-b637-a22a539ec493_grande.jpg?v=1559323856'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002386',\n",
       "  'url': 'https://resources.tidal.com/images/65e8862c/e460/41f0/981e/b25fb974d537/640x640.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002410',\n",
       "  'url': 'https://direct.rhapsody.com/imageserver/images/alb.204079853/500x500.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002426',\n",
       "  'url': 'https://www.selon-l.fr/wp-content/uploads/2019/04/banniere-wp-bx-events.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002433',\n",
       "  'url': 'http://upperdeckblog.com/wp-content/uploads/2013/04/Doc-Jacobs-Event-Upper-Deck-Operation-Gratitude-Tommy-Lasorda-Talking-to-Doc.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002466',\n",
       "  'url': 'https://i0.wp.com/www.silkejager.com/wp-content/uploads/2018/06/BronzeAmbassadorJourney.jpg?fit=300%2C300&ssl=1'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002530',\n",
       "  'url': 'http://az721511.vo.msecnd.net/images/544/2724104.JPG?636166319456700257'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002593',\n",
       "  'url': 'https://3.bp.blogspot.com/-h9WhTMJYFZ8/UMgIU8ZJoOI/AAAAAAAAACA/kOX1Ol6n1rs/s320/AIRTEL_F_V_RGB_3D.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002624',\n",
       "  'url': 'https://us.123rf.com/450wm/lmv/lmv1302/lmv130200006/17989339-restaurant-menu-design.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002667',\n",
       "  'url': 'https://www.iapa.org/wp-content/uploads/2019/02/Webp.net-resizeimage-2-e1552245032578-370x370.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002740',\n",
       "  'url': 'https://thumb7.shutterstock.com/image-photo/stock-vector-beauty-hair-salon-logo-450w-401606989.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002919',\n",
       "  'url': 'https://images.edealer.ca/18/71297032.jpeg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002935',\n",
       "  'url': 'https://i1.wp.com/blognife.com/wp-content/uploads/2017/05/fan-1.png?fit=1024%2C512'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002946',\n",
       "  'url': 'https://syamsulrijal.com/wp-content/uploads/2020/04/header-logo1.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002963',\n",
       "  'url': 'http://rlv.zcache.ca/holiday_recipe_binder_2_size-r62214748d53c48e7a08f16f245cd4baa_xz8lg_8byvr_324.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002981',\n",
       "  'url': 'https://thetalononline.com/wp-content/uploads/2019/10/theatrecollective-900x600.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002983',\n",
       "  'url': 'https://lh4.ggpht.com/vSQLNnu5TCx-YLLW-udrS2Q2bYGa1MlEq3PprqWQS3x4hYK1cX7yPls-i619l6aF_DxZ=h900'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000002989',\n",
       "  'url': 'https://itswritenow.com/wp-content/uploads/2018/02/template_358_Alexandra_John.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003025',\n",
       "  'url': 'http://d202m5krfqbpi5.cloudfront.net/books/1348717970l/1342556.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003058',\n",
       "  'url': 'http://thephotobrigade.com/wp-content/uploads/2013/02/Photographers_Outlook_on_2013_2.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003087',\n",
       "  'url': 'https://top10cinema.com/dataimages/29548-a.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003119',\n",
       "  'url': 'https://cdn.waterstones.com/bookjackets/large/9780/5712/9780571284184.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003172',\n",
       "  'url': 'http://a.mktgcdn.com/p/o0iwgIPT4C22tIJHrYXOhtZCqklYALxoZSX50aDxiro/280x280.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003264',\n",
       "  'url': 'https://i.ytimg.com/vi/U-9zhay1_hM/hqdefault.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003314',\n",
       "  'url': 'https://media.gettyimages.com/vectors/music-elements-vector-id469890499?s=612x612'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003333',\n",
       "  'url': 'https://rlv.zcache.com/first_coffee_then_tambura_music_lover_button-r092976bd1ff04826b2a6cfd00908cd11_k94rk_500.jpg?rlvnet=1'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003344',\n",
       "  'url': 'https://64.media.tumblr.com/tumblr_m6krabL2Hv1qz5q5oo1_500.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003508',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/2835/0496/products/Screen_Shot_2019-09-26_at_3.52.05_PM_1024x1024@2x.png?v=1571827904'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003557',\n",
       "  'url': 'https://shafferoffshoresolutions.com/wp-content/uploads/logo-blue-working-copy.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003604',\n",
       "  'url': 'https://www.makingmusicfun.net/images/thumbs/scott-joplin-word-search-worksheet.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003621',\n",
       "  'url': 'https://i2.wp.com/invisioncommunity.co.uk/wp-content/uploads/2021/05/SEGA-Announce-Lost-Judgment.jpg?resize=640%2C450&ssl=1'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003686',\n",
       "  'url': 'https://s3.amazonaws.com/lollipuff/media/blog/383/authentic-prada-hardware-logo-authentication.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003740',\n",
       "  'url': 'https://ibizafilmoffice.com/wp-content/uploads/2020/01/the-story-of-plastic-1024x576.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003771',\n",
       "  'url': 'https://printablehappybirthdaycards.com/wp-content/uploads/2019/03/For-Wife-Printable-Happy-Birthday-Cards-825x510.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003778',\n",
       "  'url': 'https://media.moddb.com/cache/images/downloads/1/62/61287/thumb_620x2000/tttt.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003813',\n",
       "  'url': 'https://i0.wp.com/finderskeeperscrafting.com/wp-content/uploads/2016/12/pr_ts_ma.jpg?fit=300%2C300'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003843',\n",
       "  'url': 'http://barsuk.com.mx/wp-content/uploads/2016/01/samba-maya-Featured.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003890',\n",
       "  'url': 'https://i1.wp.com/flaviogarcia.es/vlog/wp-content/uploads/2016/01/tendencias-seo-para-2016.jpg?resize=800%2C445&amp;ssl=1'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000003967',\n",
       "  'url': 'https://static.wixstatic.com/media/264a63_9b686aa038fd8e13286a3845a02a5783.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004069',\n",
       "  'url': 'https://homesteadsurvivalsite.com/wp-content/uploads/15-ways-to-improve-your-garden-soil-pin-1.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004070',\n",
       "  'url': 'https://i.ytimg.com/vi/YMAzj5T0tUs/0.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004112',\n",
       "  'url': 'https://www.dft-valves.com/img/Adobe-Spark-2-500x281.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004139',\n",
       "  'url': 'http://cdn.pastemagazine.com/www/articles/2014/03/04/PasteSXSW_InteractiveLead.jpg?635300843171398639'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004158',\n",
       "  'url': 'https://i.ibb.co/QDKjzjy/Property-Management-1-65.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004343',\n",
       "  'url': 'https://i.ytimg.com/vi/18ffCe-0fIo/hqdefault.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004372',\n",
       "  'url': 'https://eclecticmomsense.com/wp-content/uploads/2015/10/frankenstein-cupcakes-678x1024.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004375',\n",
       "  'url': 'https://images.sftcdn.net/images/t_optimizedf_auto/p/97bc217b-e1f1-4f32-a877-6229c529c7ea/291508945/rise-of-the-tomb-raider-20-year-celebration-ps-vr-ps4-logo.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004399',\n",
       "  'url': 'https://i.ebayimg.com/images/g/Te8AAOSwa1ZcHpMP/s-l500.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004468',\n",
       "  'url': 'http://images.comiccollectorlive.com/covers/c49/c4997944-611b-4c53-9c47-0db0a2897874.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004476',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/51635CFGPSL.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004488',\n",
       "  'url': 'https://www.hanaflorists.com/images/zoom_julias-designer-choices-17072792216.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004519',\n",
       "  'url': 'https://i1.wp.com/thetrademarkninja.com/wp-content/uploads/2017/07/T-Mobile-IOT-Trademark-Applications.png?resize=380%2C380'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004636',\n",
       "  'url': 'https://miriamrune.co.uk/content/images/2015/05/novel-ideas-blog-logo.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004644',\n",
       "  'url': 'http://www.game-fort.com/_nw/32/92027781.jpeg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004729',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/56089dfce4b0fb7874bd4c50/1516294695428-PLHOZGEPA8T6DZLD5K31/FINLA_Revision_IQ_LOGO_DESIGN-2.jpg?content-type=image%2Fjpeg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004792',\n",
       "  'url': 'https://4.bp.blogspot.com/-QMboo-_XrI0/V28PKYgX4uI/AAAAAAAAUhw/Nkr57HEEDxwfEsIlNI9UQ9xCWYdFhs9CQCLcB/s400/Slide1.PNG'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004803',\n",
       "  'url': 'https://images.lookhuman.com/render/standard/SoRDxb8l07DddiNxBNerv8VtK4rK45KA/6710-heathered_black-z1-t-yoga-with-the-omies.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004868',\n",
       "  'url': 'https://www.peche-leurre-evolution.com/gfx_folders/1301839227267.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000004903',\n",
       "  'url': 'http://im.rediff.com/money/2013/apr/18biggest-companies4.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005089',\n",
       "  'url': 'https://i0.wp.com/wayofdharma.com/wp-content/uploads/2018/08/caste-and-hinduism.jpg?resize=665%2C365&'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005105',\n",
       "  'url': 'https://www.mauvais-genres.com/17147-large_default/day-after-french-movie-poster-47x63-81-day-after.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005127',\n",
       "  'url': 'http://1.bp.blogspot.com/-RdNlgePPMsk/UQO56ttd27I/AAAAAAAAGyU/5VeHmid3SvA/s320/pokemon-platine.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005148',\n",
       "  'url': 'http://drh2.img.digitalriver.com/DRHM/Storefront/Company/ubi/images/hero/Rabbids_BlackTshirt_Hero_FR.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005206',\n",
       "  'url': 'https://bokklubben.no/servlet/VisBildeServlet?produktId=6996035&amp;width=95'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005381',\n",
       "  'url': 'http://indiecurrent.com/wp-content/uploads/2012/12/Favourite-Canadian-Music-Videos-of-2012-Ride-The-Tempo.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005394',\n",
       "  'url': 'http://cascadiasfault.com/wp-content/uploads/2020/06/free-alphabet-coloring-pictures-disney-pages-printable-for.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005398',\n",
       "  'url': 'https://static.onleihe.de/images/bookwire_inter/20210127/9783863911546/im9783863911546s.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005426',\n",
       "  'url': 'http://3o7tpx32lt6v2lcovs4a53lb.wpengine.netdna-cdn.com/wp-content/uploads/2013/08/Geronimo-Shot-Bar-smaller--500x303.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005489',\n",
       "  'url': 'http://a1.phobos.apple.com/us/r1000/020/Purple/53/f6/b4/mzl.jxdsthto.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005490',\n",
       "  'url': 'https://i.pinimg.com/736x/f2/6a/57/f26a57a7472692e5d3b47f47f6cb852e--the-breakfast-club-breakfast-club-quotes.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005521',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0204/7208/t/26/assets/logo_accessible360.png?62012'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005536',\n",
       "  'url': 'http://cdn.pastemagazine.com/www/articles/2011/03/21/The-Hobbit-book-cover-square.jpg?635336299397505126'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005676',\n",
       "  'url': 'https://www.miletasigns.co.uk/media/catalog/product/cache/1/thumbnail/300x400/9df78eab33525d08d6e5fb8d27136e95/c/p/cp049_1.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005705',\n",
       "  'url': 'http://1.bp.blogspot.com/-Ivmfcqtq1Kc/UbCpQ_7uU7I/AAAAAAAACIg/SCtecu39WDQ/s1600/96089427449264466617.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005816',\n",
       "  'url': 'https://s3.amazonaws.com/gigsalad_media/t/tumbao_chicago/5a7b3321ea1b3_300_sq'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005845',\n",
       "  'url': 'https://www.inspireflyer.com/img/build-with-skyscanner-badge-footer.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005883',\n",
       "  'url': 'https://www.com-sub.biz/uploads/images/full/025930bc60c28c918fc9c5fe5645a3b5.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000005941',\n",
       "  'url': 'http://www.quickmeme.com/img/75/751c3ffd668537a299b5f92789aa13ac2f127516425e2426b72c53d72648c698.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006194',\n",
       "  'url': 'https://static.uk.groupon-content.net/app/00/00/default0000.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006312',\n",
       "  'url': 'https://lh3.googleusercontent.com/-YNPcanrd01s/WYCX2nI091I/AAAAAAAAFIc/in9jbS4NS7A6uRFRIqn5PvSbz30-wW1ZgCHMYCw/sinnertitle_thumb%255B2%255D?imgmax=800'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006317',\n",
       "  'url': 'https://cdn.prestosports.com/action/cdn/logos/id/1oz6458b0o827r98.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006322',\n",
       "  'url': 'https://images.justlanded.com/classified_images/Ecuador_Pichincha_Quito/Servicios_Ordenadores-Internet/Agencias-De-Inbound-Marketing-Ecuador/photo/big_scaled_1736569_2650086.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006381',\n",
       "  'url': 'http://rlv.zcache.com.au/worlds_greatest_boss_mousepads-rde3e974765ec45b284e201d0fb7ebb33_x74vi_8byvr_324.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006383',\n",
       "  'url': 'http://tse3.mm.bing.net/th?id=OIP.V8QL0RFK5cnF68JAuwnocQHaHa'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006396',\n",
       "  'url': 'https://d3fa68hw0m2vcc.cloudfront.net/9f9/178414816.jpeg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006399',\n",
       "  'url': 'https://i.ytimg.com/vi/dKLJaTMFjY4/0.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006444',\n",
       "  'url': 'http://a.espncdn.com/combiner/i?img=/espn360/images/showassets/ERJJ.jpg&w=640&h=360&20160201093651'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006559',\n",
       "  'url': 'https://p19cdn4static.sharpschool.com/UserFiles/Servers/Server_2434311/Image/26%20Jan%20TMSA%20Board%20Meeting%20Small.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006564',\n",
       "  'url': 'https://images.saymedia-content.com/.image/t_share/MTc0NjM5OTU4NDY3MDk0NTE4/repetition-and-the-living-dead-an-analysis-of-james-joyces-the-dead.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006671',\n",
       "  'url': 'https://seoheronews.com/adwords-clicks-620x0.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006697',\n",
       "  'url': 'https://image.spreadshirtmedia.net/image-server/v1/compositions/T631A1PA1280PT17X8Y0D130215843S32/views/1width=300height=300appearanceId=1backgroundColor=E8E8E8/leaf-me-women-s-t-shirt.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006754',\n",
       "  'url': 'https://www.netoingenieria.com/wp-content/uploads/2018/01/congreso-multisectiorial-carreteas.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006838',\n",
       "  'url': 'https://i.pinimg.com/736x/4a/10/1d/4a101d10f6c776ac0a90f07ab4d3e7bd.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006852',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/51G+-ANR8LL._SL300_.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006899',\n",
       "  'url': 'https://www.schlossstrasse-koblenz.de/thumbnail.php?thumb=img/shops/logos/105.jpg&width=300&height=300'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006904',\n",
       "  'url': 'https://andreasnotebook.com/wp-content/uploads/2011/10/Viking-helmet-tutorial.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006917',\n",
       "  'url': 'http://az721511.vo.msecnd.net/images/130/651640.JPG?636237457983790927'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006941',\n",
       "  'url': 'http://canalauthenticgames.com.br/content/uploads/Prime-Authentic-Games-2017-Facebook-166-265x265.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000006981',\n",
       "  'url': 'https://static.wixstatic.com/media/45af5e_bdb9587dab814438a8d04dadcf2f17a7~mv2.png/v1/fit/w_500h_500q_90/45af5e_bdb9587dab814438a8d04dadcf2f17a7~mv2.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007039',\n",
       "  'url': 'https://www.platformhg.com/media/images/versions/img94joktmu71652.jpg?bev=1362'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007050',\n",
       "  'url': 'https://images.8tracks.com/cover/i/000/772/489/tumblr_mu65k05sDR1rjp4l1o1_500-8945.jpg?rect=00480480&q=98&fm=jpg&fit=max&w=320&h=320'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007076',\n",
       "  'url': 'https://s3.amazonaws.com/media.locally.net/logo-270x270/14590507_1361209293919802_4617975925624903492_n_2017-05-01-10-22-01.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007137',\n",
       "  'url': 'https://www.divyajanani.org/wp-content/uploads/2019/01/happy-mothers-day-coloring-pages-beautiful-happy-mothers-day-coloring-pages-heart-coloring-pages-of-happy-mothers-day-coloring-pages.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007171',\n",
       "  'url': 'http://4.bp.blogspot.com/-015aW2JD4SI/U5tAQXMp6fI/AAAAAAAAPgU/1GsiWnzAJ2k/s1600/capa.jpg%22%22%22%22%22%22%22%22%22%22%22%2222%22%22%22%22%22%22%22%22%22'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007255',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1567/1365/products/thumb_fc7cca25-33ce-4d4a-8b68-bf6cd2f13e6e_480x480.jpg?v=1535551974'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007258',\n",
       "  'url': 'https://www.eden.co.uk/images/300/9781631463228.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007260',\n",
       "  'url': 'https://bestlakecountylawyer.com/wp-content/uploads/2019/05/Criminal-Defense-2019-620x380.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007264',\n",
       "  'url': 'https://www.techmelife.com/wp-content/uploads/2021/03/10-Ways-Social-Media-Helps-Your-SEO-Strategies-e1616572147271.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007321',\n",
       "  'url': 'https://cdn4.singleinterface.com/files/offer-images/80/home_page-3929_1521540985_SIBanner434x434compressor.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007404',\n",
       "  'url': 'http://cdn01.ru/files/users/images/ac/c8/acc89416fe58bf7e2d9eb8b1ba58cc42.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007471',\n",
       "  'url': 'http://i00.i.aliimg.com/wsphoto/v0/1316216120_1/Fast-Shipping-Multifunction-Robot-Vacuum-Cleaner-Big-Mop-Low-noise-Home-Aplicances.jpg_350x350.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007637',\n",
       "  'url': 'https://m.media-amazon.com/images/I/61PpqZ8ETqL._SL500_.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007656',\n",
       "  'url': 'http://static.kodajo.com/images/user/fileUp/6c5a0da6437719cfa5d028c7fd21535d.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007713',\n",
       "  'url': 'https://kcmix.com/wp-content/uploads/2019/11/Youth-Crime-What-Can-I-Do-380x285.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007854',\n",
       "  'url': 'http://d202m5krfqbpi5.cloudfront.net/books/1332982516l/13564669.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007859',\n",
       "  'url': 'https://d12swbtw719y4s.cloudfront.net/images/k5r8coRk/7RMlT0x3kJc80NGb9ERG/5BxK2cS4zj.jpeg?w=600'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007913',\n",
       "  'url': 'https://thisdelicioushouse.com/wp-content/uploads/2019/07/Untitled-185-683x1024.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007918',\n",
       "  'url': 'http://direct.rhapsody.com/imageserver/images/Alb.17950649/500x500.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007920',\n",
       "  'url': 'https://consumerqueen.com/wp-content/uploads/2020/03/FREE-ONLINE-FITNESS-STUDIOS.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000007967',\n",
       "  'url': 'https://i1.wp.com/sunuptosundown.net/wp-content/uploads/2018/10/ChocolateIce-CreamDay.png?resize=800%2C374&ssl=1'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008141',\n",
       "  'url': 'https://www.jasoncouponking.com/wp-content/uploads/2013/12/target.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008155',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0656/6139/products/OS_0013_Men_Olive_1024x1024.jpg?v=1488698479'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008182',\n",
       "  'url': 'http://images.slideplayer.com/5/1557963/slides/slide_1.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008196',\n",
       "  'url': 'https://static.wixstatic.com/media/nsplsh_6f775a4137377252416963~mv2_d_4909_3264_s_4_2.jpg/v1/fill/w_454h_333fp_0.50_0.50q_90/nsplsh_6f775a4137377252416963~mv2_d_4909_3264_s_4_2.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008232',\n",
       "  'url': 'https://i1.wp.com/conference.virtualreality.to/wp-content/uploads/2019/05/Cream_1x1_logo-500.png?fit=500%2C500&ssl=1'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008284',\n",
       "  'url': 'https://truthernews.files.wordpress.com/2014/02/nba-ncaa-bio-terror-warning.jpg?w=385&'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008295',\n",
       "  'url': 'http://speed90calgary.com/content/images/thumbs/0001286_samsung_300.jpeg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008322',\n",
       "  'url': 'https://fernyhillretreat.blog/wp-content/uploads/2019/02/FHR-blog-logo-no-blog.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008345',\n",
       "  'url': 'https://s3.us-west-2.amazonaws.com/ycbm.production.upload.files/ycbm/NWk5WMYLoHitQCCzphdr/images/logotestforbookingcalendar.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008378',\n",
       "  'url': 'https://i.ytimg.com/vi/q9sfOGXWcAA/hqdefault.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008398',\n",
       "  'url': 'https://fonolive.com/US/ca/vannuys/17857513/05041519f02b3dd8b6fa1037c854b4c9.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008412',\n",
       "  'url': 'https://bookstore.firststepspublishing.com/wp-content/uploads/2017/07/SurvivingHitler_th.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008455',\n",
       "  'url': 'https://i2.wp.com/tkcoleman.wpengine.com/wp-content/uploads/2014/05/follow-me-and-i-ll-follow-back-2.png?resize=257%2C300'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008511',\n",
       "  'url': 'http://rlv.zcache.com/peace_love_juggle_round_stickers-r68cd2e9a0bb247f99b9a988d6d8e0d21_v9waf_8byvr_512.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008560',\n",
       "  'url': 'https://photos.bandsintown.com/thumb/6262168.jpeg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008565',\n",
       "  'url': 'https://www.fashionsvp.com/wp-content/uploads/2018/10/svp-logo-social-media.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008594',\n",
       "  'url': 'http://i2.ytimg.com/vi/UX-TX0_P9f4/0.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008717',\n",
       "  'url': 'https://i2.wp.com/asempanews.com/wp-content/uploads/2019/02/Emmanuel-Boateng-has-completed-his-move-to-Dalian-Yifang.jpg?resize=682%2C402&amp;ssl=1'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008740',\n",
       "  'url': 'https://dshgames.ru/wp-content/uploads/2019/11/45178826.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008763',\n",
       "  'url': 'http://rlv.zcache.co.uk/he_loves_me_more_than_videogames_round_sticker-r695b34ab74b040f2b989cb2987bdf6b0_v9waf_8byvr_324.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008870',\n",
       "  'url': 'https://www.idplr.com/components/com_remository_files/file_image_12046/img_12046_01.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008883',\n",
       "  'url': 'https://matsuri.sc/sys/wp-content/uploads/2019/06/ma2k-600x491.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000008985',\n",
       "  'url': 'http://a4.mzstatic.com/us/r1000/071/Purple/d0/5f/6e/mzm.caxvphqx.png'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009070',\n",
       "  'url': 'https://knickoftime.net/wp-content/uploads/2016/08/autumn-stencil-give-thanks.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009122',\n",
       "  'url': 'https://images.financialexpress.com/2019/04/upsc-2.jpg?w=660&h=440&imflag=true'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009162',\n",
       "  'url': 'https://images.amain.com/images/large/asc/asc89278.jpg?width=200'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009228',\n",
       "  'url': 'https://static1.squarespace.com/static/559abcdae4b078c942e5c735/t/5d5d4f0c7d731300013b9232/1568920539487/?format=1500w'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009229',\n",
       "  'url': 'http://direct.rhapsody.com/imageserver/images/Alb.169582/500x500.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009306',\n",
       "  'url': 'http://3.bp.blogspot.com/-baNJ0IO7XYM/T-T98u8iTVI/AAAAAAAAGJ0/c3elxsuWRhQ/s400/559b.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009379',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/51sSN7pB9bL.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009507',\n",
       "  'url': 'https://us.123rf.com/450wm/pinnacleanimates/pinnacleanimates1211/pinnacleanimates121100044/16131281-vector-happy-diwali-greeting-illustration.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009627',\n",
       "  'url': 'https://images.lookhuman.com/render/standard/0842006469058676/iphone7sn-whi-z1-t-i-drink-haterade-all-day.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009708',\n",
       "  'url': 'http://clubjerseys.net/pic/Nike-Dolphins--2325-Xavien-Howard-Orange-Women-27s-Stitched-NFL-Limited-Rush-Jersey-4399-17297.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009727',\n",
       "  'url': 'https://secureservercdn.net/198.71.233.109/zz0.685.myftpupload.com/wp-content/uploads/2019/11/CVS-CAT-DIGITAL.jpg?time=1594677716'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009750',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/61Glg3X8kXL.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009813',\n",
       "  'url': 'https://static3.bigstockphoto.com/thumbs/1/4/1/large2/141030668.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009860',\n",
       "  'url': 'https://www.challengecoins.ca/wp-content/uploads/2019/05/Manitoba-Correctional-Services-Established-1871-316x316.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009915',\n",
       "  'url': 'https://s3.eu-central-1.amazonaws.com/kidsread-books/bossy-and-the-blue-elephant/thumbs/1.jpg'},\n",
       " {'ex_idx': '00000',\n",
       "  'in_idx': '000009990',\n",
       "  'url': 'http://pic.rutube.ru/video/c8/02/c8026a8d3720e3c779ad2c2f5cdd5b7a.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010044',\n",
       "  'url': 'https://www.tamilkavithaihal.com/uploads/kadhal-Kavithaigal/kadhal-kavithaigal-photos-idhayam-thudippathu-unakaga-mattum-meera-tamil-kavithai-photos-download.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010091',\n",
       "  'url': 'https://d7olld39l2hok.cloudfront.net/logo/4214275.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010187',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0243/7761/products/ScreenShot2020-06-03at4.50.27PM_380x@2x.png?v=1591226983'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010232',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/563cb327e4b03fd23ffc5941/1511024795146-TVJKAJM1ME4G58MYGOXE/Pleasure%2BSnowboard%2BMagazin.jpeg?content-type=image%2Fjpeg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010234',\n",
       "  'url': 'https://storage.googleapis.com/hipstamp/p/ea887acd95042517885d324fcac9254c-300.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010238',\n",
       "  'url': 'http://brokenequipment.com/viewer.php?p6Y2ma6u2spDa6P2V8ze8Vz88egg8x88bJYD'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010277',\n",
       "  'url': 'https://i0.wp.com/raksbooks.com/wp-content/uploads/2020/04/false-value-audiobook-by-ben-aaronovitch.jpg?fit=300%2C300&ssl=1'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010296',\n",
       "  'url': 'http://images.slideplayer.com/2/761664/slides/slide_7.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010301',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/61lt37IzjiL._SL300_.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010303',\n",
       "  'url': 'https://1.bp.blogspot.com/-F6lQA6MJvlg/WaR_xzhpzuI/AAAAAAAAHpE/nMkQJQTNHCY49S2FQ3q0J2gSc_FXf35fgCLcBGAs/s320/blog%2Bbutton.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010330',\n",
       "  'url': 'http://edtech.wwcsd.net/wp-content/uploads/2017/06/technology-equipment.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010387',\n",
       "  'url': 'https://1.bp.blogspot.com/-Vei8-b26r_o/XtjQdMndLyI/AAAAAAAAs-E/X0js5p954gYZ4g1glJXfOMKwS4zrbSevQCLcBGAsYHQ/s1600/Verse-of-the-Day.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010420',\n",
       "  'url': 'https://www.investbypro.com/wp-content/uploads/2020/03/National-Savings-Certificates-min.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010438',\n",
       "  'url': 'https://s1.dmcdn.net/v/H0QPw1Nl0vN9hGfMZ/x720'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010464',\n",
       "  'url': 'http://rlv.zcache.ca/i_was_born_to_play_the_drums_mousepads-r0ea790343aa6467085c35e645eedd7f4_x74vi_8byvr_324.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010482',\n",
       "  'url': 'https://i.pinimg.com/originals/1b/7b/e5/1b7be564de6e47f0e39731cdec34b166.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010514',\n",
       "  'url': 'https://dynamic.brandcrowd.com/asset/logo/69918387-908c-4f04-be1f-61f5d2bab958/logo?v=4&text=Logo+Text+Here'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010550',\n",
       "  'url': 'https://ss.shayanashop.com/oi/slider/Kaleidoskope/Kaleidoskope_580x319pix_EN.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010634',\n",
       "  'url': 'https://images.bwbcovers.com/006/The-Art-of-Loving-Fromm-Erich-9780060958282.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010677',\n",
       "  'url': 'https://images-na.ssl-images-amazon.com/images/S/cmx-images-prod/Item/735067/735067._SX312_QL80_TTD_.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010718',\n",
       "  'url': 'http://gumps.scene7.com/is/image/Gumps/168992_is?$PIP_Main$'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010748',\n",
       "  'url': 'https://pbcdn1.podbean.com/imglogo/image-logo/2577245/Bad_Reception_Large.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010799',\n",
       "  'url': 'https://ducttapeanddenim.com/wp-content/uploads/2018/04/Before-and-after-spring-green-jewelry-cabinet-DuctTapeAndDenim.com_-1024x1024-600x600.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010857',\n",
       "  'url': 'http://www.playingrockguitar.com/wp-content/uploads/2009/12/Dis_Pedal-300x266.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010929',\n",
       "  'url': 'http://4.bp.blogspot.com/-x51sJBXY_Ys/U7LkGTKuRrI/AAAAAAAAFPw/qgGCgMLx4e4/s300/summerfest.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010931',\n",
       "  'url': 'https://img.youtube.com/vi/Y4DoPk0JLvc/0.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010957',\n",
       "  'url': 'https://purvesinsurance.com/wp-content/uploads/2015/08/Fotolia_87325583_Subscription_Monthly_M-1024x682.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010962',\n",
       "  'url': 'https://i.ytimg.com/vi/Sh2A48Unh4s/hqdefault.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000010976',\n",
       "  'url': 'https://i0.wp.com/www.theartofcoachingvolleyball.com/wp-content/uploads/2016/06/Attacking-Comprehensive-Course.png?fit=264%2C264&amp;ssl=1'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011036',\n",
       "  'url': 'http://obscure-abhorrence.de/cover/21508trifixionthefirstandthelastcommandment120151208.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011053',\n",
       "  'url': 'http://img1.imagesbn.com/p/9780970696298_p0_v1_s260x420.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011155',\n",
       "  'url': 'https://i.ytimg.com/vi/JvFxJmGJaBY/hqdefault.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011171',\n",
       "  'url': 'https://artwork-cdn.7static.com/static/img/sleeveart/00/029/720/0002972006_350.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011209',\n",
       "  'url': 'http://www.acousticcentre.co.uk/user/products/thumbnails/NS%20WAV5%20Double%20Bass%20-%20Amber.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011324',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1786/8373/products/small-lilac-hurts-so-good-womens-workout-tank-top-19157926728_195x195@2x.jpg?v=1549990604'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011348',\n",
       "  'url': 'https://s3.amazonaws.com/static.noisetrade.com/w/64249e7a-038a-4f68-bf9a-0896c7f703f7/good_investigations.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011411',\n",
       "  'url': 'https://i24.servimg.com/u/f24/15/34/85/80/anh6121.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011422',\n",
       "  'url': 'https://cdn11.bigcommerce.com/s-df4cz/images/stencil/500x659/products/6970/5095/SHAKA654.2-2__24741.1398390775.jpg?c=2'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011511',\n",
       "  'url': 'http://pick.cyberpe.org/forum/films/tv_show/The.Colony.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011544',\n",
       "  'url': 'http://ih0.redbubble.net/image.80441319.3160/pp370x410-pad420x460f8f8f8.u3.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011607',\n",
       "  'url': 'https://cdnfa.com/difwear/aee9/files/normal/3262067.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011650',\n",
       "  'url': 'https://i2.wp.com/www.musicarenagh.com/wp-content/uploads/2018/03/OMI-JASON-Victory-Over-SinProdby-Mr-Benchie-mp3-image.jpg?fit=1200%2C1200&'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011662',\n",
       "  'url': 'https://i.ytimg.com/vi/mshHomZlMOY/0.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011826',\n",
       "  'url': 'https://deliverlogic-cravedel.s3.amazonaws.com/logos/front/8483.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011870',\n",
       "  'url': 'http://4.bp.blogspot.com/-Ap4fzk4UVQo/Vd7nIIR_W1I/AAAAAAAADss/ZB1-L-sjWy0/s400/Download%2BFree%2BGame%2BTales%2Bfrom%2Bthe%2BBorderlands%2B%2528All%2BVersions%2529%2BUnlock%2BMulti-pack%2B%255BEpisodes%2B2-5%255D%2B100%2525%2BWorking%2Band%2BTested%2Bfor%2BIOS%2Band%2BAndroid.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000011998',\n",
       "  'url': 'https://www.hashtagbylily.com/wp-content/uploads/2018/04/What-to-do-in-Hong-Kong-part-2-330x330.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012023',\n",
       "  'url': 'http://novavideoz.com/wp-content/uploads/2018/04/Harmonize-ft-Diamond-Platnumz-%E2%80%93-Kwangwaru-500x400.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012082',\n",
       "  'url': 'https://mk0gamesnetentcxytko.kinstacdn.com/wp-content/uploads/2019/02/flowers-christmas.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012091',\n",
       "  'url': 'http://i.vimeocdn.com/portrait/2124563_300x300.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012141',\n",
       "  'url': 'https://s3.amazonaws.com/cms.ipressroom.com/256/files/20200/56967ac05e8eef6fa444ee12_Engineering+logo/Engineering+logo_s.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012237',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/54a43fb9e4b0d1cd06e51447/1422156942904-CJ5T32H1HZIEO1XJZ96A/habitation+conference+logo_ivey+media+group.jpg?format=1000w&content-type=image%2Fjpeg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012301',\n",
       "  'url': 'https://img0.etsystatic.com/203/0/11077597/il_340x270.1417321564_bles.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012348',\n",
       "  'url': 'http://messhelper.com/wp-content/uploads/12333/tmp-b2e676b2-a196-45b9-bdaf-6c09ff1222c6.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012399',\n",
       "  'url': 'https://static.es.groupon-content.net/app/00/00/default0000.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012461',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0044/3328/1135/products/image_3c54afbe-0199-436f-af5b-e792df2a836a_250x250@2x.jpg?v=1596859943'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012533',\n",
       "  'url': 'https://clipground.com/images/reptilium-clipart-9.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012724',\n",
       "  'url': 'http://kitelife.com/wp-content/uploads/2016/12/KiteKites_comad.png?pas=15877746031702240410'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012745',\n",
       "  'url': 'http://rlv.zcache.com/happy_birthday_teddy_bear_2nd_birthday_sticker-r64214bedd6ea45a7ad472d423858a4a3_v9waf_8byvr_324.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012758',\n",
       "  'url': 'https://images.socialwelfare.library.vcu.edu/files/thumbnails/1f5f551dd68e86f9cfaa53c397cffe3a.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012790',\n",
       "  'url': 'http://conniechapman.com/wp-content/uploads/2016/07/slow-down-tune-in.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012839',\n",
       "  'url': 'https://irbarcelona.it/wp-content/uploads/2016/10/purchase-citypassbcn.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012925',\n",
       "  'url': 'https://di2ponv0v5otw.cloudfront.net/posts/2019/06/06/5cf958dcfe19c7f83506ee97/s_5cf9592cc953d822221f62c2.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012952',\n",
       "  'url': 'https://st.depositphotos.com/1013213/2954/v/380/depositphotos_29549137-stock-illustration-summer-travel-design.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000012974',\n",
       "  'url': 'https://milnersblog.files.wordpress.com/2018/05/solo-a-star-wars-story-dolby-amc-exclusive-film-poster-banner.jpg?w=672&amp;h=372&amp;crop=1'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013048',\n",
       "  'url': 'https://m.media-amazon.com/images/I/51tIQJNKc0L._SL500_.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013086',\n",
       "  'url': 'http://anchorstone.com/wp-content/uploads/2014/08/discovered.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013163',\n",
       "  'url': 'https://i.ebayimg.com/00/s/NTUxWDU2Mg==/z/4iUAAMXQs6FRQvBE/$T2eC16Z!yEE9s5jGJZWBRQvBEH+Tg~~6060_35.JPG?set_id=8800005007'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013168',\n",
       "  'url': 'https://i.ytimg.com/vi/Pxuju4laJHw/hqdefault.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013173',\n",
       "  'url': 'https://images-eu.ssl-images-amazon.com/images/I/511aRAabyPL.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013219',\n",
       "  'url': 'https://static1.bigstockphoto.com/thumbs/0/5/1/large2/150481730.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013226',\n",
       "  'url': 'https://media.karousell.com/media/photos/products/2017/08/14/100042_121789023_thumbnailW'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013237',\n",
       "  'url': 'http://betterbuilthomes.com.au/wp-content/uploads/2016/11/xmas-lightbox-669x272.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013248',\n",
       "  'url': 'https://media.karousell.com/media/photos/products/2016/07/14/102716_60199909_thumbnaily'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013397',\n",
       "  'url': 'https://i1.wp.com/bmariephoto.com/wp-content/uploads/2014/03/tuesday-top-3-1.jpg?resize=640%2C640'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013405',\n",
       "  'url': 'https://image.spreadshirtmedia.com/image-server/v1/compositions/111004097/views/1width=300height=300version=1473664654/enjay-t-shirt-green-men-s-premium-t-shirt.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013502',\n",
       "  'url': 'http://b.vimeocdn.com/ps/622/075/6220755_300.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013510',\n",
       "  'url': 'https://i.ytimg.com/vi/qgL-rvBRrpw/0.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013523',\n",
       "  'url': 'https://m.media-amazon.com/images/I/51sRrOp3TuL._SL320_.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013619',\n",
       "  'url': 'https://lyricstamizha.com/wp-content/uploads/2018/01/Yaar-Ivan-Song-Lyrics.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013671',\n",
       "  'url': 'https://direct.rhapsody.com/imageserver/images/Alb.259691142/500x500.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013761',\n",
       "  'url': 'https://thefullnester.com/wp-content/uploads/2018/09/CopingwithHomesicknessin-College-1-683x1024.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013775',\n",
       "  'url': 'http://freedesignresources.net/wp-content/uploads/2016/07/10-Free-Realistic-Landscape-Background-prev02.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013876',\n",
       "  'url': 'https://cdn.doyou.com/wp/2015/08/6-Alignment-Tips-for-Revolved-Triangle-Pose.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013887',\n",
       "  'url': 'https://static1.bigstockphoto.com/2/5/1/large2/152304320.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013889',\n",
       "  'url': 'http://savedbylovecreations.com/wp-content/uploads/2012/04/TinCans.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013939',\n",
       "  'url': 'https://www.sbdcnj.com/wp-content/uploads/SS_Logo_Clean_Blue.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013952',\n",
       "  'url': 'https://cfl-createforless.netdna-ssl.com/p-images/3/2015/0925/289687-3-1.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000013994',\n",
       "  'url': 'http://tse1.mm.bing.net/th?id=OIP.kaH_IuK8_d4Ej_-_iO8HmQHaGW'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014045',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/547e23cee4b078699e15e789/1441912932977-4084X18EP745WT6U1U5R/logo.jpg?format=1500w&content-type=image%2Fjpeg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014092',\n",
       "  'url': 'http://rlv.zcache.co.uk/i_brake_for_horse_trainers_bumper_sticker-r3ac6fd3ceed54253bb08b492fb7aeaa9_v9wht_8byvr_324.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014106',\n",
       "  'url': 'https://s-lite.qwant.com/thumbr/480x320/6/2/69a98eee41e36abcdf9d33c7c1253e58f8b2525699e694c13041cefedeeef1/s-l300.jpg?u=https%3A%2F%2Fi.ebayimg.com%2Fimages%2Fg%2FTg0AAOSw3Ppe5C9N%2Fs-l300.jpg&q=0&b=1&p=0&a=0'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014148',\n",
       "  'url': 'https://i1.ytimg.com/vi/Y9AHZwoOCY8/hqdefault.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014165',\n",
       "  'url': 'http://lh3.googleusercontent.com/PrDlHY9uJxbV6avABA968De_On7OrxzNde_NIpz81eIAPwWZaF6UsCQ-vq0zquIiYA=w300'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014197',\n",
       "  'url': 'https://central-prod.s3-us-west-2.amazonaws.com/files/events/logos/2214/banner/Logo_Corrida.jpg?1526998128'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014231',\n",
       "  'url': 'https://www.planetdance.com/Graphics/Product_Thumbnails/Medi_WB09.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014234',\n",
       "  'url': 'https://gamexguide.com/wp-content/uploads/2019/11/uC430zc6irJxqplknVELig-760x567.jpeg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014299',\n",
       "  'url': 'http://3.bp.blogspot.com/-uYlwGl-n790/UDrZtCgyaAI/AAAAAAAAABc/xEgCWXltdck/s1600/header.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014305',\n",
       "  'url': 'https://cdn2.hubspot.net/hub/147789/file-405510524-jpg/SSF_Color_Logo.jpg?t=1405012110500'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014423',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1717/6009/products/beta-tools-mastercargotm-5-drawer-workbench-c57sd-workbench-beta-tools-5_large.jpg?v=1538022284'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014451',\n",
       "  'url': 'http://ep.yimg.com/ay/collegefanfare/tennessee-27-adidas-replica-football-jersey-orange-9.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014509',\n",
       "  'url': 'https://images.g2crowd.com/uploads/product/image/large_detail/large_detail_1515183774/schoolauction-net.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014543',\n",
       "  'url': 'https://i.ytimg.com/vi/B5lzt42Tb20/hqdefault.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014634',\n",
       "  'url': 'https://us.123rf.com/450wm/onirb/onirb1307/onirb130701947/21142535-3d-graphic-with-vintage-wifi-label-on-vintage-background.jpg?ver=6'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014682',\n",
       "  'url': 'https://www.pjfiala.com/wp-content/uploads/2018/01/Boxed-set-front-cover-505x800.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014733',\n",
       "  'url': 'https://apollo-singapore.akamaized.net:443/v1/files/gra8oewsopyl1-IN/image;s=272x0'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014744',\n",
       "  'url': 'https://thumbs.dreamstime.com/b/text-sign-showing-my-goals-conceptual-photo-goal-aim-strategy-determination-career-plan-objective-target-vision-written-notebo-114007591.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014764',\n",
       "  'url': 'https://animalgiftclub-static.myshopblocks.com/images/2019/03/contain/512x512/5f3e387ea533cd46681d4f74d5ed9834.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014826',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/2138/0103/products/RWYAGreyTee_450x450.jpg?v=1599673652'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014864',\n",
       "  'url': 'https://www.wooconn.sk/wp-content/uploads/2020/07/ikros-connector-362x362.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014869',\n",
       "  'url': 'https://audiobyray.com/wp-content/uploads/2018/10/Animas-Music-Record-Label-AudiobyRay-Listing.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014932',\n",
       "  'url': 'http://freevector.co/wp-content/uploads/2012/12/winthrop-eagles-9.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000014984',\n",
       "  'url': 'https://i.servimg.com/u/f19/20/13/02/96/frt10.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015004',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/59c90fb68dd041d078f5c847/1542315256617-GBLTQBBE4CI1487V51I4/raspberry-fact3.png?content-type=image%2Fpng'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015011',\n",
       "  'url': 'https://image.isu.pub/131218144804-b0191da45469b90009ae59b010761cff/jpg/page_1_thumb_large.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015110',\n",
       "  'url': 'https://dm0qx8t0i9gc9.cloudfront.net/thumbnails/image/rDtN98Qoishumwih/2013-schedule-calendar-shows-future-business-targets_zk6agVP__thumb.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015140',\n",
       "  'url': 'https://cdn3.volusion.com/mqpef.fzrft/v/vspfiles/photos/LS-KOO-1.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015206',\n",
       "  'url': 'https://www.dhresource.com/260x260s/f2-albu-g7-M01-D1-60-rBVaSlvrzDWARayaAAGqmGjmXy8633.jpg/ipl-shr-hair-removal-machine-most-popular.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015286',\n",
       "  'url': 'https://mir-s3-cdn-cf.behance.net/projects/404/e8e0df27146041.55810d5d31e40.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015436',\n",
       "  'url': 'https://i.ytimg.com/vi/BgjUPo8Iw5s/hqdefault.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015457',\n",
       "  'url': 'https://www.onlinecasinoreports.co.nz/images/crazywinners770x436.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015506',\n",
       "  'url': 'http://ecimages.kobobooks.com/Image.ashx?imageID=I7IGb83X-0mAS-0LDfVX5Q&amp;Type=Full'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015533',\n",
       "  'url': 'https://i3.cpcache.com/product/75375638/Camp_Hair_Dont_Care_Mug_300x300.jpg?height=300&amp;width=300&amp;qv=90&amp;side=back&amp;Filters=[%7B&quot;name&quot;:&quot;background&quot;&quot;value&quot;:&quot;ddddde&quot;&quot;sequence&quot;:2%7D]'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015555',\n",
       "  'url': 'https://www.cmirad.net/wp-content/uploads/awards-BOES-2020-upd.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015632',\n",
       "  'url': 'http://tse1.mm.bing.net/th?id=OIP.oyCsBr2oouFNT1mJcIIF-AHaHa'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015651',\n",
       "  'url': 'https://oi-punk.com/out/pictures/generated/product/1/380_340_70/plan.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015666',\n",
       "  'url': 'https://cloud.firebrandtech.com/api/v2/img/111/9781781314630/L'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015723',\n",
       "  'url': 'https://i.ytimg.com/vi/DHmFB5lSMR0/hqdefault.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015928',\n",
       "  'url': 'https://cache.mansion.com/shared/lobby/web/games/251x147/bjp.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000015941',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0049/1182/4965/products/9781938328923_250x250@2x.jpg?v=1550157343'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016024',\n",
       "  'url': 'http://direct.rhapsody.com/imageserver/images/Alb.53223107/500x500.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016050',\n",
       "  'url': 'https://londonjuniorknights.com/public/images/teams/1550/sponsors/THE_TRAP_DOC_Inc.JPG'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016169',\n",
       "  'url': 'http://cdn.pastemagazine.com/www/articles/2014/03/04/PasteSXSW_InteractiveLead.jpg?635301253355093818'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016181',\n",
       "  'url': 'http://www.wytheraceway.com/wp-content/uploads/2018/04/Feature-Winner.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016267',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0045/6608/9774/products/6E6A1189_GPSS_1500X_9e63f73f-8c3d-4bd0-a8e7-d3514beadd28_530x@2x.jpg?v=1606243775'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016307',\n",
       "  'url': 'http://img2.imagesbn.com/p/9781433601521_p0_v2_s260x420.JPG'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016330',\n",
       "  'url': 'http://exomotive.com/wp-content/uploads/2012/02/Rocket_Article-640x416.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016363',\n",
       "  'url': 'http://img.gawkerassets.com/img/17m8au650w2isjpg/original.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016393',\n",
       "  'url': 'https://st2.depositphotos.com/3740491/8097/v/380/depositphotos_80973478-stock-illustration-bakery-sticker-collection-with-hand.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016436',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0018/9340/0623/products/BlackForestCherryOnTop_512x.jpg?v=1608188301'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016444',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0006/2124/8569/articles/unnamed_242849a4-ebed-4d15-b777-d9036068bf85_2000x.jpg?v=1611589943'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016459',\n",
       "  'url': 'https://static3.bigstockphoto.com/8/4/1/large2/148921085.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016466',\n",
       "  'url': 'https://images.all-free-download.com/images/graphicthumb/summer_sale_banner_red_flame_decoration_6833658.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016472',\n",
       "  'url': 'https://phantanews.de/wp/wp-content/uploads/2015/06/e3-trailer-zu-mass-effect-androm.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016489',\n",
       "  'url': 'https://i0.wp.com/www.justcoloring.info/wp-content/uploads/2018/05/letter-c-is-for-cat-coloring-page-free-printable-letter-c-coloring-pages.png?ssl=1'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016494',\n",
       "  'url': 'http://i.ytimg.com/vi/p6ZxI5_A69M/0.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016507',\n",
       "  'url': 'http://www.quickmeme.com/img/57/57bdf230d100cb528d9d6bd3a76350a9ac1a0602791dd3471761d911b6262d10.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016516',\n",
       "  'url': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_320/images.spreaker.com/original/855219ba9e6089f767ddddebcbaea2f1.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016556',\n",
       "  'url': 'https://i.dlpng.com/static/png/256405_thumb.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016564',\n",
       "  'url': 'https://04b8419750745bc449e5-6ed6708ab01c785479edc27d79224746.ssl.cf1.rackcdn.com/thumbnails/WDCTG4GB9GJ209372/21d61f81e0123c8566d6980912ee1d9e.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016597',\n",
       "  'url': 'http://balloony.de/media/image/thumbnail/1005502_470x470.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016613',\n",
       "  'url': 'https://mk0muwucepum99ape3ia.kinstacdn.com/wp-content/uploads/2018/09/UniversityApplication-300x300.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016627',\n",
       "  'url': 'https://i.ytimg.com/vi/cXDULZBOb5U/0.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016708',\n",
       "  'url': 'https://i0.wp.com/apkonehack.com/wp-content/uploads/2019/03/Stardew-Valley-FULL-APK-ANDROID-DOWNLOAD.png?resize=720%2C349&ssl=1'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016723',\n",
       "  'url': 'https://s3.amazonaws.com/rapgenius/1360663053_cole-truly-yours.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016752',\n",
       "  'url': 'https://static2.bigstockphoto.com/thumbs/3/3/1/large2/133496717.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016814',\n",
       "  'url': 'https://media.karousell.com/media/photos/products/2016/09/11/083521_67299830_thumbnailH'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016820',\n",
       "  'url': 'https://mjobriendesign.com/wp-content/uploads/2015/01/logo-YEP-logo.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016868',\n",
       "  'url': 'https://www.hollywoodlanews.com/wp-content/uploads/2016/01/if-then-idina-menzel-pantages-theater-la-394x330.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016951',\n",
       "  'url': 'http://i1.wp.com/www.findingthewardrobe.com/wp-content/uploads/2016/06/Book-Review-The-Nest.png?resize=735%2C420'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000016991',\n",
       "  'url': 'http://rlv.zcache.com/funny_horse_junkie_gifts_iphone_case-rc5b5b6e612444b25832466f8eec3af5a_vx34d_8byvr_324.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017032',\n",
       "  'url': 'https://c5.staticflickr.com/1/289/31847994852_3cc8b1ca0e.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017041',\n",
       "  'url': 'https://i0.wp.com/www.casinoplayersreport.com/wp-content/uploads/2016/08/gan_chicksaw.jpg?resize=300%2C275&ssl=1'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017084',\n",
       "  'url': 'https://i.harperapps.com/hcuk/covers/9780007347056/x350.JPG'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017214',\n",
       "  'url': 'http://weekendnotessydney.hubgarden.com/images/make_a_wish_sydney_comedy_night_2.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017261',\n",
       "  'url': 'https://img-1.fyyd.de/pd/layout/57701959833d11c42e0ef896fffe806a21053.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017271',\n",
       "  'url': 'https://kpbs.media.clients.ellingtoncms.com/img/features/2016/01/25/CaliforniaCounts_t640.png?a6ea3ebd4438a44b86d2e9c39ecf7613005fe067'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017303',\n",
       "  'url': 'http://s019.radikal.ru/i627/1603/a2/d227236fba8c.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017308',\n",
       "  'url': 'https://us.123rf.com/450wm/alexwhite/alexwhite1406/alexwhite140600377/29103811-new-year-2015-red-computer-icon-on-white-background.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017342',\n",
       "  'url': 'https://mcdn.elefant.ro/images/84/494784/a-short-history-of-the-jews-paperback_1_fullsize.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017392',\n",
       "  'url': 'https://is1-ssl.mzstatic.com/image/pf/us/r30/Purple3/v4/7c/2c/02/7c2c028e-d01a-cc3d-1dcf-31b70175050b/mzl.imgpgofn.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017414',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0268/9249/t/2/assets/slideshow_2.jpg?0'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017512',\n",
       "  'url': 'http://newyearsevenight.com/wp-content/uploads/2018/12/2019_3D_logo-2.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017548',\n",
       "  'url': 'http://img2.imagesbn.com/p/2940033207091_p0_v2_s260x420.JPG'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017591',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1436/4514/t/4/assets/logo.png?1058400102851290921'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017597',\n",
       "  'url': 'https://image.spreadshirtmedia.net/image-server/v1/compositions/114665055/views/1width=300height=300appearanceId=2backgroundColor=E8E8E8version=1450277104/believe-in-yourself-caps-hats-winter-hat.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017697',\n",
       "  'url': 'https://www.steamyconcepts.com/wp-content/uploads/Steamy-Concepts-Carpet-Cleaning-Tab.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017704',\n",
       "  'url': 'http://snapwishes.com/uploads/Online-Love-Photo-Card-Maker-with-Name-300x420.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017717',\n",
       "  'url': 'https://bestwishes4birthday.com/wp-content/uploads/D/happy%20birthday%20cupcake%20sign%20;%2080067-Happy-Birthday-Cupcakes.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017806',\n",
       "  'url': 'https://machinehub.com/images/machine-hub-logo.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017824',\n",
       "  'url': 'http://media3.fcbarcelona.com/media/asset_publics/resources/000/089/901/size_640x360/2013-10-26_BARCELONA-MADRID_18.v1395220946.JPG'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017834',\n",
       "  'url': 'https://cdn11.bigcommerce.com/s-4pg4qzd524/images/stencil/original/products/23265/19204/stcl1585_with_my_whole_heart_for_my_whole_life_pi_red_barn_2_1__28733.1486194762.jpg?c=2'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017835',\n",
       "  'url': 'http://rlv.zcache.co.uk/region_west_leverkusen_buttons-r2d66cc58690646b4900855f4b4458015_x7j18_8byvr_324.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017911',\n",
       "  'url': 'https://i.ytimg.com/vi/Kx_g_QCtOZI/0.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017953',\n",
       "  'url': 'https://i0.wp.com/www.stuffwelike.com/wp-content/uploads/2009/07/troncomp_ref.jpg?fit=1200%2C642&ssl=1'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000017997',\n",
       "  'url': 'https://avatars.sched.co/1/ce/8875235/avatar.jpg?635'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018040',\n",
       "  'url': 'https://tse2.mm.bing.net/th?id=OIP.y2jIGh1-kSQNBom5Zo3IHgAAAA&amp;w=131&amp;h=131'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018058',\n",
       "  'url': 'http://rlv.zcache.co.uk/genuine_biographer_tee_shirt-r6780d11d87b54678a4a8958af4dc6f31_vjfef_324.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018108',\n",
       "  'url': 'https://www.heartki.com/wp-content/uploads/2014/05/theuniverse_1.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018161',\n",
       "  'url': 'http://rlv.zcache.co.uk/dont_feed_the_animals_round_stickers-r896cf30504234a2096d453821d98b3cb_v9wth_8byvr_324.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018192',\n",
       "  'url': 'https://therewillbe.games/media/reviews/photos/thumbnail/640x640s/cf/94/b7/skulk-hollow-board-game-review-91-1574900473.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018228',\n",
       "  'url': 'https://davidlhudsonjr.com/wp-content/uploads/2018/08/Andrew-Goodman-FDN.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018369',\n",
       "  'url': 'https://pefnj.org/wp-content/uploads/2017/08/Screen-Shot-2017-02-06-at-11.07.29-AM-768x495-510x382.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018410',\n",
       "  'url': 'https://images-na.ssl-images-amazon.com/images/I/81kmu3Go09L._SX425_.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018616',\n",
       "  'url': 'https://d1gij4u04nulni.cloudfront.net/pics/property/339205702/1/IDX_1/v2//crop/540358'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018638',\n",
       "  'url': 'http://rlv.zcache.co.uk/i_love_two_handed_tennis_pc_speakers-r5d4e74a526ff4c18b92700aa635341ff_vs8xj_8byvr_324.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018699',\n",
       "  'url': 'https://api.historyit.com/iiif/2/5ae8f0730fe610.99848740/5bec37695453b2.58493308.jpg/full/!400400/0/default.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018717',\n",
       "  'url': 'http://womanofmanyroles.com/wp-content/uploads/2013/03/easterdrinksdon-1024x1024.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018757',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/585314452994cae130a1afbb/1482340264347-UHW5ZV298EGAZD3HM6AV/BCC+Logo+-+white.png?format=1500w&content-type=image%2Fpng'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018762',\n",
       "  'url': 'http://www.5thround.com/wp-content/uploads/2015/05/UFCPosterFightNight.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018767',\n",
       "  'url': 'https://www.digitalindiagov.in/wp-content/uploads/2019/01/unnamed-517x300.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018806',\n",
       "  'url': 'https://memegenerator.net/img/instances/43590803/morena.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018875',\n",
       "  'url': 'https://apshop.eu/880965-home_default/strongflex-rear-upper-link-inner-bush-sport-221552a.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018912',\n",
       "  'url': 'https://rlv.zcache.co.uk/speak_fluent_sarcasm_t_shirt-r6f7e547c4b474f4eb8b5cb2174d5b58e_65ye0_540.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018942',\n",
       "  'url': 'http://cybergrass.com/Images/BCGDivide.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000018945',\n",
       "  'url': 'http://i.qkme.me/3q3s9o.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019003',\n",
       "  'url': 'https://ktperformance.net/images/T140584741.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019016',\n",
       "  'url': 'https://realtor.remarketer.ca/Members/Images/brokerage/ash_ashestates.cal.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019054',\n",
       "  'url': 'https://joegilpin.hipcast.com/albumart/1000_1610946106.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019082',\n",
       "  'url': 'https://online-casino-codes.com/wp-content/uploads/2018/04/285-match-bonus-at-Adler-Online-Casino.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019090',\n",
       "  'url': 'https://image.tmdb.org/t/p/w342/B0WkSmfxuyK4jbPkfmTsKX5I4b.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019094',\n",
       "  'url': 'http://krui.fm/wordpress/wp-content/uploads/2015/01/Iowa-Basketball.jpeg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019161',\n",
       "  'url': 'https://upload.wikimedia.org/wikipedia/commons/thumb/a/a8/Open_Access_Week_stencil_and_card_made_from_stencil_%28square%29.jpg/300px-Open_Access_Week_stencil_and_card_made_from_stencil_%28square%29.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019188',\n",
       "  'url': 'https://images.happycow.net/venues/1024/98/97/hcmp98976_375991.jpeg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019243',\n",
       "  'url': 'https://www.ligue-bretagne-surf.bzh/wp-content/uploads/2019/03/Logo-ESB-Surf-Club-e1553374195720.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019308',\n",
       "  'url': 'https://gadgetsmatrix.com/wp-content/uploads/thumbs_dir/ccdf-la5hrtu22zlid7p4fl3f2mbe9rfvy6dhxny67amf9k.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019317',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0955/6214/products/cocktails_and_dreams_neon_large.jpg?v=1522417184'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019504',\n",
       "  'url': 'http://img.bleacherreport.net/img/images/photos/002/196/975/liberty_vs_gardner_webb_crop_north.jpg?w=630&h=420&q=75'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019549',\n",
       "  'url': 'http://koothoomi-records.com/images/5074.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019560',\n",
       "  'url': 'https://media1.5amily.com/prev_cache/4e8ebcd42fe808208a1a1fa8ddf5939d055b465e.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019587',\n",
       "  'url': 'https://i.ytimg.com/vi/JBVpQzvrJ4w/maxresdefault.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019634',\n",
       "  'url': 'https://i2.wp.com/simpleathome.com/wp-content/uploads/2018/09/build-the-best-garden-soil.jpg?resize=700%2C1049'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019741',\n",
       "  'url': 'https://i.pinimg.com/736x/4b/a3/35/4ba33590e09a435343eec10614d4b785.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019743',\n",
       "  'url': 'https://hackster.imgix.net/uploads/attachments/304767/thumbnail_2pNvkPYiOk.jpg?auto=compress%2Cformat&amp;w=400&amp;h=300&amp;fit=min'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019762',\n",
       "  'url': 'https://img.youtube.com/vi/GMj0TheMEb8/hqdefault.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019802',\n",
       "  'url': 'https://www.leaksmith.com/wp-content/uploads/2019/04/gaf-certified.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019806',\n",
       "  'url': 'https://jp.slotsup.com/wp-content/uploads/logo-star-trek-red-alert-wms.png'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019824',\n",
       "  'url': 'https://secrethampsted.files.wordpress.com/2016/04/the_clash_visite_rock_londres.jpg?w=768'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019851',\n",
       "  'url': 'http://cdn.pastemagazine.com/www/articles/tomb%20raider%20square.jpg?635334990394962546'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019913',\n",
       "  'url': 'https://media.istockphoto.com/vectors/word-world-smile-day-vector-in-flat-style-vector-id1166140477?b=1&k=6&m=1166140477&s=170667a&h=MfQ4Z1SkDBjSCA6OPa5AhOFPoMaqm_YxjqaO8IqJQkQ='},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019940',\n",
       "  'url': 'https://us.123rf.com/450wm/reginast777/reginast7771802/reginast777180200059/96433124-verzameling-van-cute-cartoon-vlinders-ge%C3%83%C2%AFsoleerd-op-een-witte-achtergrond-.jpg?ver=6'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019955',\n",
       "  'url': 'https://solrio.org/wp-content/uploads/2017/09/Poseidon-Huntington-Beach-desalination-plant-770x300.jpg'},\n",
       " {'ex_idx': '00001',\n",
       "  'in_idx': '000019984',\n",
       "  'url': 'http://www.jvgs.net/dmw2/wp-content/uploads/2016/10/lm.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020005',\n",
       "  'url': 'https://i.pinimg.com/736x/91/a4/f6/91a4f6ef5f1149364f1299ff46d78a8a--seattle-washington-washington-state.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020047',\n",
       "  'url': 'http://d202m5krfqbpi5.cloudfront.net/books/1360616440l/15705572.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020053',\n",
       "  'url': 'http://rlv.zcache.co.uk/made_in_1987_button-r16a5ff1ec8124ad8a764d37d2321c8b9_x7j3i_8byvr_324.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020174',\n",
       "  'url': 'https://direct.rhapsody.com/imageserver/images/alb.56897913/500x500.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020181',\n",
       "  'url': 'https://tse1.mm.bing.net/th?id=OIP.dRWjySvbhkcObga2aQfbDgHaEK'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020209',\n",
       "  'url': 'http://lr-assets.storage.googleapis.com/_nielsen/400/9781780553955.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020290',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0225/2973/products/image_4ec88890-e6dc-44de-ac79-70242d014490_large.png?v=1579123646'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020357',\n",
       "  'url': 'http://www.thedesignsheppard.com/wp-content/uploads/2017/03/National-Tile-Week-2017-635x318.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020419',\n",
       "  'url': 'https://i.pinimg.com/736x/e1/d8/c8/e1d8c8adb56236cdf6c4c3b06b8f3e8a.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020423',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0820/7847/files/psn.jpg?v=1491706456'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020425',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0920/0236/t/1/assets/logo.png?10530566925433882303'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020463',\n",
       "  'url': 'https://www.thepalmbeaches.com/sites/default/master/files/styles/listing_thumb/public/mmg_lfef_images/wicked-delray-ghost-tours-42162-8bd91ec2352d0e062c9192d5a48baf2e.jpg?itok=4nbfrX8_'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020573',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0278/7289/files/Free-retro-vintage-fonts_Big-John-Slim-Joe_1024x1024.jpg?v=1495367650'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020581',\n",
       "  'url': 'https://content.sportslogos.net/news/2020/05/Screen-Shot-2020-05-23-at-1.43.51-AM.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020605',\n",
       "  'url': 'https://images1.americanlisted.com/nlarge/2015-kia-optima-ex-americanlisted_113934401.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020610',\n",
       "  'url': 'https://dynamic.activeactivities.com.au/resources.php?image=listings/0/8/7/163087/images/22062.jpg&amp;width=70&amp;height=70'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020653',\n",
       "  'url': 'http://rlv.zcache.co.uk/i_love_passion_fruit_round_sticker-raf59190d45674a4fbd980db2cc84a27f_v9waf_8byvr_324.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020654',\n",
       "  'url': 'http://southreggae.com/imagens/livros/bookmarleyjapanbarrylaz.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020673',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0168/3980/products/s-l1600_1308345b-a6e6-42fb-a3ce-6ffa00ae149e_large.jpg?v=1469192722'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020687',\n",
       "  'url': 'https://d2si46jc38oa3k.cloudfront.net/system/images/links/l900/81/81bcdf1811172ae7.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020791',\n",
       "  'url': 'http://cdn-s3-1.wanelo.com/product/image/1701690/full_size.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020872',\n",
       "  'url': 'https://images.megaphone.fm/PDGfrFNqU_oLaMG0y-2zeGPctSZdOl66TRibwKDb4lo/plain/s3://megaphone-prod/podcasts/32534192-614b-11e9-b18c-03a0ee6b52a1/image/HOF-Episode-27-JackJohnson-pt02-TheFight.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020884',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/2062/5273/products/9781612363042_b3c49114-6e7b-44e9-a564-360d8204de39_1024x1024.jpg?v=1550585009'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020898',\n",
       "  'url': 'https://i0.wp.com/emmareed.net/wp-content/uploads/2017/03/International-Womens-Day-1-e1488974203349.jpg?fit=737%2C363&ssl=1'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020930',\n",
       "  'url': 'https://perfectaquatics.co.uk/media/catalog/product/cache/1/small_image/295x295/9df78eab33525d08d6e5fb8d27136e95/m/i/mini_underwater_filter_200_lph.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000020994',\n",
       "  'url': 'http://i.bosscdn.com/product/1b/8e/b4/0b2bb6bab36f48cb5e7694a442.jpg@4e_360w_360h.src'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021004',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/2131/6377/products/GARM-GR29009-TS-MAN-WHT_400x.jpg?v=1578612760'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021032',\n",
       "  'url': \"http://images.publicjerseyz.ru/images/Billy/2017/2017-Super-Bowl/Youth's/Nike-Patriots-87-Rob-Gronkowski-Red-2017-Super-Bowl-LI-Champions-Youth-Game-Jersey.jpg\"},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021055',\n",
       "  'url': 'http://img2.imagesbn.com/p/9788420548463_p0_v1_s260x420.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021088',\n",
       "  'url': 'http://rlv.zcache.co.uk/create_your_own_football_jersey_black_silver_table_card-red37722737c841ddb7848730b77ffd0c_i40g8_8byvr_324.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021160',\n",
       "  'url': 'http://4.bp.blogspot.com/-LHYK_0HLnl4/TjoGrYZKAKI/AAAAAAAAABA/duKabJLUA68/s1600/Elec-City-banner-sized.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021163',\n",
       "  'url': 'https://img.youtube.com/vi/luRhYG9wKWE/0.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021173',\n",
       "  'url': 'http://i1.cpcache.com/product/186037057.jpg?height=150&width=150'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021338',\n",
       "  'url': 'https://i.scdn.co/image/ab67616d00001e02a1a8927c6ecc651e2682ed7e'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021340',\n",
       "  'url': 'http://cdn.resize.sparkplatform.com/key/1024x768/true/20170725202802606519000000-o.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021384',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0040/6316/6531/products/FREQUENCYBREAK-RECHARGE-QUADIBLEINTEGRITY-KARMICATTUNEMENT_300x300.png?v=1590794582'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021388',\n",
       "  'url': 'https://dudemom.com/wp-content/uploads/2013/11/holiday-gift-guide-for-boys.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021463',\n",
       "  'url': 'https://www.psdmarket.net/wp-content/uploads/2018/02/latin_party_flyer_psd_psdmarket_1-300x300.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021511',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/2164/1263/products/The_Gentle_Whisper_of_Living_Things_195x195@2x.jpg?v=1500925576'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021545',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0270/5759/products/newcastle-ale_large.jpg?v=1422039992'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021709',\n",
       "  'url': 'https://img.freepik.com/free-vector/welcome-summer-bright-poster-design-pink-flamingo_74855-484.jpg?size=626&ext=jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021711',\n",
       "  'url': 'https://image.spreadshirtmedia.com/image-server/v1/compositions/105907793/views/3width=300height=300appearanceId=1backgroundColor=E8E8E8version=1367139512/not-just-any-eejit-and-irish-one-st-patricks-day-bottles-mugs-travel-mug.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021720',\n",
       "  'url': 'http://images.football.co.uk/630x472/126b982a1b0b7dd39c9a28011cb75f3a.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021788',\n",
       "  'url': 'https://www.jasplastik.com/img/dodavatelia/_squares/Quality-manual.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021830',\n",
       "  'url': 'http://read.images.worldlibrary.org/img/Members.3/Audio_eBooks/Chapters/magnificent_ambersons_r/magnificent_ambersons_r.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021836',\n",
       "  'url': 'https://1.bp.blogspot.com/-lcDAdGH-50c/WQ5hoW9vzNI/AAAAAAAAiqs/KOnGKBRtf3sra6wDvzZ-Z-FBEJYtjyplQCK4B/s1600/The%2BOld%2BFair%2BLOGO%2B512X512.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021843',\n",
       "  'url': 'https://lcimages.s3.amazonaws.com/data/feat_img/4518/10329/1589227464.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000021872',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/59b7b1e0f9a61e3c5131a0c8/1523428157057-EKRR5P4QYPBWLACT2RY2/3c606d640e77a530e517a367318a5512_original.jpg?format=1000w&content-type=image%2Fjpeg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022005',\n",
       "  'url': 'http://lh4.googleusercontent.com/-uYEPltupeBw/AAAAAAAAAAI/AAAAAAAABFM/SsFVlx2TCVc/photo.jpg?sz=257'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022032',\n",
       "  'url': 'https://cheapdigitaldownload.com/wp-content/uploads/buy-nickelodeon-kart-racer-cd-key-compare-prices-2.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022053',\n",
       "  'url': 'http://rlv.zcache.co.uk/i_love_advance_missouri_mousepad-r28259db956304dd08021299c2bbc91d9_x74vi_8byvr_324.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022072',\n",
       "  'url': 'https://media.apnarm.net.au/media/images/2020/07/11/v3imagesbin94a8164c32cbef14101124c65891aa9c-4go8qjgh9w6svplonu2_t1880.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022172',\n",
       "  'url': 'https://blog.polleverywhere.com/wp-content/uploads/2019/08/womenstrivia.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022202',\n",
       "  'url': 'https://4.bp.blogspot.com/-UH66hGpfNmo/Ubh3jJ9OJ2I/AAAAAAAAYvs/L4BNJtNm-DU/s1600/Rapture%27s+Edge.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022256',\n",
       "  'url': 'http://aksdm.aldianews.com/sites/default/files/styles/article_image/public/articles/warreniowa.jpg?itok=44dJAzNI'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022306',\n",
       "  'url': 'https://i0.wp.com/stayfitmom.com/wp-content/uploads/2015/05/fullbodyworkout.jpg?resize=600%2C646'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022315',\n",
       "  'url': 'https://i.ytimg.com/vi/1d_q05HlGO0/maxresdefault.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022477',\n",
       "  'url': 'http://img.comc.com/i/Baseball/2013/Topps-Tribute---Certified-Autograph-Issue---Orange-Autographed/TA-MO2/Mike-Olt.jpg?id=9e0b3a15-bd0a-44b7-a188-f779187c468b&size=original'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022552',\n",
       "  'url': 'https://yearendparty.vn/wp-content/uploads/2015/07/year-end-party.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022561',\n",
       "  'url': 'https://s3.amazonaws.com/cdn.innovativelanguage.com/sns/em/2016/june/Learn+in+car.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022633',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/53bb42f4e4b0eb51cfda5752/1423714592510-AIB73FCO65DEHTJXNRUN/MOBLogo?content-type=image%2Fjpeg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022635',\n",
       "  'url': 'https://image.spreadshirtmedia.com/image-server/v1/compositions/1013554421/views/1width=300height=300version=1478482171/i-don-t-give-a-damn-funny-graphic-t-shirt-t-shirts-men-s-premium-t-shirt.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022651',\n",
       "  'url': 'https://christiancamppro.com/wp-content/uploads/2018/02/Best-Camps-Retreats-in-Alabama.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022666',\n",
       "  'url': 'https://static.wixstatic.com/media/4e9949_a76b7d6aedd440668c4da5527c040acd~mv2.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022859',\n",
       "  'url': 'https://trendytechie.files.wordpress.com/2016/05/pike_place_market_seattle_trendy_techie_2.jpg?w=720'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022923',\n",
       "  'url': 'http://d2dzjyo4yc2sta.cloudfront.net/?url=images.pitchero.com%2Fui%2F126397%2Fimage_59259361814fc.jpg&amp;w=800&amp;h=800&amp;t=square'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022943',\n",
       "  'url': 'https://image.shutterstock.com/image-photo/stock-vector-baby-car-seat-vector-icons-toddle-car-seat-safe-child-traveling-icons-vector-icons-set-baby-450w-722346157.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000022948',\n",
       "  'url': 'http://jouonsplus.com/img/p/568-703-large.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023014',\n",
       "  'url': 'https://static3.bigstockphoto.com/5/8/1/large2/185465425.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023027',\n",
       "  'url': 'http://st.depositphotos.com/1001439/2873/v/450/depositphotos_28737429-Colorful-easter-floral-background.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023040',\n",
       "  'url': 'https://i.pinimg.com/originals/19/3c/f6/193cf6467b7b38eaf7f5e8bd9e179c12.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023093',\n",
       "  'url': 'https://lh3.googleusercontent.com/E7n1TWkgqvj7hrSgPo2RFvea9X0t7_m5_vIAjbQABmdXe1KObY9WbQhXKmLjH87Xicj_=h355'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023102',\n",
       "  'url': 'https://images.vexels.com/media/users/3/71949/list/69412a008cbf8fcc670d5288e92a685a-20-businessman-icons.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023136',\n",
       "  'url': 'http://img.picturequotes.com/2/3/2986/live-like-someone-left-the-gate-open-quote-2.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023213',\n",
       "  'url': 'https://image.spreadshirtmedia.net/image-server/v1/compositions/124117021/views/1width=300height=300appearanceId=2version=1439812226.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023302',\n",
       "  'url': 'https://birthright.com.au/wp-content/uploads/2018/03/quotes-Mentoring-for-Doulas-300x300.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023326',\n",
       "  'url': 'https://ppntestblog.files.wordpress.com/2012/08/mainimages_conference.jpg?w=780'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023386',\n",
       "  'url': 'https://www.partnachlodge.de/wp-content/uploads/2020/08/cropped-logo-partnachlodge-header.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023480',\n",
       "  'url': 'https://thumb7.shutterstock.com/image-photo/stock-vector-creative-vector-abstract-for-mother-s-day-with-nice-and-creative-illustration-in-background-450w-406105375.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023491',\n",
       "  'url': 'http://www.netnewsledger.com/wp-content/uploads/2014/09/Frost-on-the-Windshield-Sept-18-2014.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023553',\n",
       "  'url': 'http://covers.booktopia.com.au/big/9780977535644/heroic-forceful-and-fearless.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023562',\n",
       "  'url': 'https://us.123rf.com/450wm/mrswilkins/mrswilkins1804/mrswilkins180400769/99227082-north-pole-post-office-rubber-stamp.jpg?ver=6'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023602',\n",
       "  'url': 'https://image.spreadshirtmedia.com/image-server/v1/compositions/1010284665/views/1width=300height=300appearanceId=359version=1456137105/you-make-me-whole-tote-bag.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023657',\n",
       "  'url': 'https://expressautologistics.com/wp-content/uploads/authorize.net_.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023667',\n",
       "  'url': 'https://mmedia.ozone.ru/multimedia/1010816996.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023679',\n",
       "  'url': 'https://cdn.schoolloop.com/uimgcdn/aHR0cHM6Ly9wbXMtc2N1c2QtY2Euc2Nob29sbG9vcC5jb20vdWltZy9maWxlLzE1MjAwNjU0NDc5ODQvMTQwNzk4OTcwMzEzMS82Mzc0MTQ3NTI5MDk0MDc1MDEzLmpwZw=='},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023737',\n",
       "  'url': 'https://i1.sndcdn.com/avatars-000210261349-jci4gw-t500x500.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023741',\n",
       "  'url': 'https://m.media-amazon.com/images/I/41XWFjjf7NL.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023779',\n",
       "  'url': 'http://img2.imagesbn.com/p/97361227245_p0_v1_s260x420.JPG'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023784',\n",
       "  'url': 'https://i.icanvas.com/LFS79?d=3&sh=v&p=1&s=m&bg=g&t=1569551609'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023862',\n",
       "  'url': 'https://static1.bigstockphoto.com/thumbs/1/3/1/large2/131294690.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023879',\n",
       "  'url': 'http://oldies.scdn5.secure.raxcdn.com/i/boxart/w340/a-z/e/esjz4201369.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023889',\n",
       "  'url': 'https://i2.wp.com/www.aw2y.es/wp-content/uploads/2017/07/logo12.png?fit=772%2C478&'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023955',\n",
       "  'url': 'http://welovegraphics.com/Tagarooz/Saturday/sat2_9865fkkfjja.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000023980',\n",
       "  'url': 'https://i.imgur.com/8qV0LZs.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024031',\n",
       "  'url': 'https://i.ytimg.com/vi/C8sn0VY6zEo/0.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024038',\n",
       "  'url': 'http://blog-imgs-43.fc2.com/d/o/n/donkichirou/WinUtilities12.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024053',\n",
       "  'url': 'https://www.jeffhendricksondesign.com/wp-content/uploads/2016/04/hanley-retro-font-12.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024095',\n",
       "  'url': 'https://media.karousell.com/media/photos/products/2018/02/26/162652_156521227_thumbnail.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024119',\n",
       "  'url': 'http://o.aolcdn.com/dims-shared/dims3/GLOB/crop/1499x999+0+0/resize/590x393!/format/jpg/quality/85/http://o.aolcdn.com/hss/storage/midas/30a7c153cbd10e5de0bc56700199833a/204146627/MASP3313.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024120',\n",
       "  'url': 'https://base-ec2if.akamaized.net/w=300a=0q=90u=1/images/item/origin/96e00bcee336876e16c12010c5252fa1.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024142',\n",
       "  'url': 'https://olxlbimages-a.akamaihd.net/43f8b3582dda3424f27f9b3d80f866a3/olxlb_2256154_1_644x461.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024181',\n",
       "  'url': 'https://www.thedigitalhash.com/wp-content/uploads/2019/07/xmukul2.jpg.pagespeed.ic.Nh_D9ZiO57.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024203',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1506/5418/products/ELECTRICAL_POWER_SHUNT_TRIP_large.JPG?v=1524148794'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024209',\n",
       "  'url': 'https://d1mi3s36zg393u.cloudfront.net/event/183046/5f6700fc83fc400882b1342359bf0209.image!jpeg.377782.jpg.thelovefestivalmx650X650.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024211',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0920/5942/products/HM_2014_Archive_large.jpg?v=1438716132'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024426',\n",
       "  'url': 'https://lh3.googleusercontent.com/proxy/WYKepEFhldG2ZA5w46bkui825WJYxaIUDQlkjO55ZXu5PYZNKPfl1PFcDC15Cg6X4g2gbX23dkSSJSEfSLBwd864w4Cg69HGZEwRMq2ryPgxhddfnE9WstLMllJq2qXFIuihIbfB7pBORAwIZYoo7zYDDgcCCKszD4ms0A=w530-h298-p'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024441',\n",
       "  'url': 'https://www.egrabber.com/blog/wp-content/uploads/2015/04/b2b_blog.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024455',\n",
       "  'url': 'https://www.gamesforcats.com/wp-content/uploads/2019/10/Word-Villas-Answers-and-cheats.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024500',\n",
       "  'url': 'https://wgna.com/files/2012/10/night-at-north-pole-logo-1.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024571',\n",
       "  'url': 'http://blog.contentools.com/wp-content/uploads/2018/09/3-types-600x300.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024608',\n",
       "  'url': 'https://farm3.staticflickr.com/2192/2407869633_fb2b491ddc.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024639',\n",
       "  'url': 'https://mlpnk72yciwc.i.optimole.com/cqhiHLc.WqA8~2eefa/w:350/h:350/q:75/rt:fill/g:ce/https://bleedingcool.com/wp-content/uploads/2017/05/Perception6.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024678',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1631/5609/articles/Namm2020andSpeakerCompressionFeaturedImage_345x.png?v=1580256974'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024822',\n",
       "  'url': 'https://s3.amazonaws.com/saumcbrmedia/wp/wp-content/uploads/2018/03/10111147/easter-egg-hunt-18.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024946',\n",
       "  'url': 'https://content.cdntwrk.com/mediaproxy?url=https%3A%2F%2Fi.ytimg.com%2Fvi%2FyoTvjWxXlow%2Fhqdefault.jpg&size=1&version=1541786369&sig=b0ea12890c1f4eb644f3d4205c361510&default=hubs%2Ftilebg-videos.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024948',\n",
       "  'url': 'https://petgroomershenandoahvalley.com/wp-content/themes/apm-2018/images/logo.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024994',\n",
       "  'url': 'https://i.scdn.co/image/112994ca261bb7dfb627b9f76cc7ddbafc016a36'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000024999',\n",
       "  'url': 'https://dx72k0ec4onep.cloudfront.net/product/3512/1085889221/GNA5EA-1500730304-390x390-double_trouble_12.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025018',\n",
       "  'url': 'http://4.bp.blogspot.com/-gWZ-YdGG_zk/VwMNaYl515I/AAAAAAAACEI/gR0NPMDWy9szjhswOMeVTj519gnpfGZug/s270/Earth%2BDay%2BActivities%2B-%2BThumbnail.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025040',\n",
       "  'url': 'https://www.angelsachse.de/images/product_images/info_images/4603_0.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025059',\n",
       "  'url': 'http://www.geeksofdoom.com/GoD/img/2013/03/2013-03-05-game_of_thrones_dragon.jpeg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025164',\n",
       "  'url': 'https://pictures.abebooks.com/isbn/9781557420589-es-300.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025265',\n",
       "  'url': 'http://cdn.shopify.com/s/files/1/0207/0894/products/bball-labels1_grande.png?v=1425494100'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025309',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0091/7579/3744/t/2/assets/logo.png?2667'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025485',\n",
       "  'url': 'https://d3t3ozftmdmh3i.cloudfront.net/production/podcast_uploaded/1478497/1478497-1550942884084-6314ed1f9c1e2.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025507',\n",
       "  'url': 'http://covestreetcapital.com/wp-content/uploads/2017/02/Give_ChildrensHospitalLA.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025598',\n",
       "  'url': 'https://www.hrstransportinc.com/content/uploads/2016/04/cropped-logo.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025611',\n",
       "  'url': 'http://image.tmdb.org/t/p/w300/ef4gmMkP4At2TZbHJ2lViwFG53S.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025613',\n",
       "  'url': 'https://www.lifewithbabykicks.com/wp-content/uploads/2015/01/clean-eating-peanut-butter-flapjacks-1.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025649',\n",
       "  'url': 'http://rooms101.com/images/og/image.php?c=3137'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025669',\n",
       "  'url': \"https://cdn.shopify.com/s/files/1/1211/5954/products/watermelon_760x.jpg?v=1546411434'\"},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025675',\n",
       "  'url': 'https://www.literaryroadhouse.com/wp-content/uploads/2015/11/Literary-Roadhouse-Header-PC-300x300.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025704',\n",
       "  'url': 'https://warrenisweird.files.wordpress.com/2015/11/top05winter.jpg?w=547&h=391'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025710',\n",
       "  'url': 'http://i1.ytimg.com/vi/51fk0Mj5oLE/0.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025780',\n",
       "  'url': 'https://i0.wp.com/frame-poythress.org/wp-content/uploads/2015/02/poythresssociology_shrink.png?fit=298%2C475&ssl=1'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025819',\n",
       "  'url': 'https://fleetimages.bobitstudios.com/upload/government-fleet/content/news/logos/gfx-logo-hr-01-__-600x300-a.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025829',\n",
       "  'url': 'http://static1.squarespace.com/static/54980ccbe4b08da3f829c707/t/54a269b2e4b0bcb26c2e5655/1508233935044/?format=1500w'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025892',\n",
       "  'url': 'https://img.reelgood.com/content/movie/cbd7b705-0d13-4d92-8253-916259cbe09f/poster-342.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025907',\n",
       "  'url': 'https://www.onplatinum.com.au/wp-content/uploads/2019/05/Phishing-Scam-Emails.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025917',\n",
       "  'url': 'https://d1e4pidl3fu268.cloudfront.net/880d3898-2a29-4d76-835e-6bcd0019fc3a/schoolICTpolicydevelopment.crop_651x488_740.preview.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025931',\n",
       "  'url': 'https://us.123rf.com/450wm/uniyok/uniyok1509/uniyok150900018/45649395-stock-vector-template-design-of-logo-stamp-silhouette-hello-autumn-watercolor-orange-texture-vector.jpg?ver=6'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000025948',\n",
       "  'url': 'https://memegenerator.net/img/instances/10576464/live-mediocre-life-die-at-age-72.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026049',\n",
       "  'url': 'http://tse2.mm.bing.net/th?id=OIP.q8GWyJqNZxQWan3TLOfcQgHaI2'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026070',\n",
       "  'url': 'https://fromabcstoacts.com/wp-content/uploads/2018/06/A-Camping-Spree-with-Mr-Magee.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026126',\n",
       "  'url': 'https://brandslogo.net/wp-content/uploads/2015/07/auto-meter-logo-vector-download.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026181',\n",
       "  'url': 'http://soundings.com/wp-content/uploads/2010/10/cd_350px_healing-waters.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026184',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/v1/542451b5e4b0b6739e3ba5ec/1505874996302-GCDBAMRJECCA60AX9O1Q/ke17ZwdGBToddI8pDm48kP06O0_IHyRXSOOiqwgWaApZw-zPPgdn4jUwVcJE1ZvWEtT5uBSRWt4vQZAgTJucoTqqXjS3CfNDSuuf31e0tVEHLRkg2cosQUGLeQ33UzXdgIxPDaVwE3LlEpL74qP4JVW4jCyXLPvvdR287iymYt8/wild-pitch-podcast.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026211',\n",
       "  'url': 'https://images.internetstores.de/products//675540/02/30534a/Jack_Wolfskin_Milton_Gloves_grey_heather[280x280].jpg?forceSize=true&amp;forceAspectRatio=true&amp;useTrim=true'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026235',\n",
       "  'url': 'http://riverside.n-yorks.sch.uk/images/images/website-graphics/logos/sainsburys-school-games-gold-16-17-flat.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026239',\n",
       "  'url': 'https://www.graphicsfactory.com/clip-art/image_files/image/8/1556448-Professor-Or-Scientist-Cartoon-Character-Holding-A-Pointer-With-Speech-Bubble-Vector-Illustration-Flat-Design-With-Background.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026371',\n",
       "  'url': 'https://nscurl.com/wp-content/uploads/2019/09/coaches_week_en.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026384',\n",
       "  'url': 'https://www.happydecal.ca/image/cache/data//Q116%20%E6%95%88%E6%9E%9C%E5%9B%BE1-350x280_0.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026408',\n",
       "  'url': 'https://a57.foxnews.com/images.foxnews.com/content/fox-business/markets/2017/05/09/allergan-reports-1q-loss/_jcr_content/par/featured-media/media-0.img.jpg/932/470/1494330210073.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026435',\n",
       "  'url': 'https://image.spreadshirtmedia.com/image-server/v1/compositions/P1010395418T1188A70PC1017383866PA2539PT10X0Y19S24/views/1width=300height=300appearanceId=70backgroundColor=E8E8E8version=1456746674/penguin-with-a-hokey-stick-buttons-iphone-7-rubber-case.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026451',\n",
       "  'url': 'https://www.fairtradeproduktetest.com/wp-content/uploads/2019/03/Fairtrade-Produkte-Test-Logo_Stand-20190320.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026574',\n",
       "  'url': 'https://seguinfoundation.org/wp-content/uploads/2019/06/LogoSquished.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026649',\n",
       "  'url': 'https://images.sftcdn.net/images/t_optimizedf_auto/p/a10e795c-9b6a-11e6-9c6d-00163ec9f5fa/397203148/gta-liberty-city-stories-logo.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026686',\n",
       "  'url': 'http://mcdn.zulily.com/images/cache/product/350x1000/57024/collins_cs6344_creamandred_1374712683.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026729',\n",
       "  'url': 'https://img.sanctuary.fr/fiche/300/5215.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026776',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1326/8593/products/made_in_1987Sweatshirts_Layer_1_grande.jpg?v=1468948927'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026789',\n",
       "  'url': 'https://www.reversepaisa.com/wp-content/uploads/2020/05/images-1.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026791',\n",
       "  'url': 'http://www.calspasfremont.com/img/logos/made-in-us-lg-bg.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026840',\n",
       "  'url': 'https://rjs-industrie-resine-france.com/wp-content/uploads/2020/02/cropped-lofo-rjs-1.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000026931',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/5a77ba07aeb6254c5dc87624/1619738538983-4C5Z2A2V7SL4LZV0GW1K/unusual_light_source.jpg?content-type=image%2Fjpeg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027057',\n",
       "  'url': 'https://static1.bigstockphoto.com/2/1/8/large2/81299213.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027070',\n",
       "  'url': 'https://img0.etsystatic.com/101/1/8703712/il_340x270.894142720_pzk0.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027145',\n",
       "  'url': 'https://img1.liveinternet.ru/images/attach/c/10/109/9/109009223_Mini_Motif_crochet_pattern_000.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027150',\n",
       "  'url': 'https://cdn.abclocal.go.com/images/otrc/2010/photos/free-agents_nbc.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027242',\n",
       "  'url': 'http://rlv.zcache.com/save_the_date_wedding_personalized_invite-r07b55a4e7bc74ff3b2d985f93a3741a2_8dnmv_8byvr_325.jpg?bg=0xffffff'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027270',\n",
       "  'url': 'https://i.ytimg.com/vi/fHkLZC2A9Wo/hqdefault.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027285',\n",
       "  'url': 'https://guitargeargiveaway.co.uk/wp-content/uploads/2020/11/500-Christmas-Cash-600x600.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027359',\n",
       "  'url': 'https://www.techmen.net/wp-content/uploads/2018/03/1-21.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027556',\n",
       "  'url': 'https://dasg7xwmldix6.cloudfront.net/hostpics/d861f0b7-092b-4296-a3fd-3500ce729eab_podcast_art__talk_star_wars_rob_wade.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027594',\n",
       "  'url': 'https://eventticketboss.com/sites/default/files/styles/focal_cropped_thumbs/public/Cher_510x475-0c24c1d49b.png?itok=9vBJLU-d'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027623',\n",
       "  'url': 'https://images-na.ssl-images-amazon.com/images/I/61HDiD8wJqL._SL300_.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027637',\n",
       "  'url': 'http://dvdyatii.com//media/prod_20110617025507.pjpeg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027647',\n",
       "  'url': 'https://d.gr-assets.com/books/1403198824l/20757532.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027788',\n",
       "  'url': 'https://media2.wnyc.org/i/800/0/c/80/photologue/photos/no-more-corruption.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027797',\n",
       "  'url': 'http://i.ytimg.com/vi/yBbmxkoi3QY/0.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000027935',\n",
       "  'url': 'http://cdn.images.express.co.uk/img/dynamic/1/590x/daily-milk-chocolate-429500.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028026',\n",
       "  'url': 'https://baobook.pl/2009-home_default/tickly-christmas-wibbly-pig.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028065',\n",
       "  'url': 'https://keep-calm.net/images/keep-calm-and-drink-tea-600-800-white-green.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028127',\n",
       "  'url': 'https://us.123rf.com/450wm/popcar/popcar1812/popcar181200003/115903656-farmers-market-metal-sign-with-retro-pickup-.jpg?ver=6'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028395',\n",
       "  'url': 'https://jellytoastblog.com/wp-content/uploads/2014/12/OPA_HolidayScramble_graphic_3.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028406',\n",
       "  'url': 'http://thepost.s3.amazonaws.com/wp-content/uploads/2013/03/national-weather-service-logo.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028493',\n",
       "  'url': 'https://base-ec2if.akamaized.net/w=500a=0q=90u=1/images/item/origin/0e8f2282a2bb6a84a8334dc455fd3010.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028563',\n",
       "  'url': 'http://tweitesfamilyfarm.com/wp-content/uploads/2018/12/Celebrating-31-Years.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028683',\n",
       "  'url': 'https://thecommoncentsclub.com/wp-content/uploads/2018/11/Making-Sense-of-Affiliate-Marketing.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028708',\n",
       "  'url': 'https://image.spreadshirtmedia.com/image-server/v1/compositions/T1186A1PA2537PT17X12Y2D1010534728S25/views/1width=300height=300appearanceId=1/curly-hair-don-t-care-adjustable-apron.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028894',\n",
       "  'url': 'https://d3rbxgeqn1ye9j.cloudfront.net/fileadmin/_processed_/f/c/csm_uvex-apache-folder-eyeprotection-for-special-forces_28211ac607.jpg?1519809586'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000028955',\n",
       "  'url': 'https://i.etsystatic.com/13863278/d/il/db4673/1671561977/il_340x270.1671561977_dk79.jpg?version=0'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029093',\n",
       "  'url': 'https://www.puromanga.net/wp-content/uploads/2018/11/1-MARVEL--300x300.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029151',\n",
       "  'url': 'https://media.gettyimages.com/photos/indian-cricketers-pose-for-photographers-after-their-victory-in-the-picture-id843406370?s=612x612'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029221',\n",
       "  'url': 'http://spatialplanningtudelft.org/wp-content/uploads/2016/03/Pages-from-Fresh-Eyes-on-the-Refugee-Crisis_titel-290x290.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029259',\n",
       "  'url': 'https://i.ytimg.com/vi/PveEqxH2sfM/maxresdefault.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029300',\n",
       "  'url': 'https://kbimages1-a.akamaihd.net/a93bc055-2ee0-4e26-b006-2d673177a18a/353/569/90/False/the-meaning-of-relativity-illustrated.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029319',\n",
       "  'url': 'http://i3.cpcache.com/product/367233782/the_man_behind_the_belly_greeting_card.jpg?width=550&amp;height=550&amp;Filters=%5b%7b%22name%22%3a%22background%22%2c%22value%22%3a%22F2F2F2%22%2c%22sequence%22%3a2%7d%5d'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029325',\n",
       "  'url': 'http://4.bp.blogspot.com/-_7GA4rftsP0/Wf_IuXZZ5hI/AAAAAAAAFTg/nF8V7Ra5-kUgVUJsEBdq7fpXX06_58inACK4BGAYYCw/s1600/Remakes%2BBlogfest.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029432',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0264/1553/0030/t/7/assets/logo.png?v=10844027794414651129'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029446',\n",
       "  'url': 'https://img1.fold3.com/img/thumbnail/162005711/300/400/0_0_1179_1755.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029503',\n",
       "  'url': 'https://www.cdfinancial.co.uk/wp-content/uploads/2021/04/helptobuy-img.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029510',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/51fML9Jn00L._SL300_.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029528',\n",
       "  'url': 'https://cdn.mytheatreland.com/images/show/27360_show_portrait_large.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029576',\n",
       "  'url': 'http://www.tabletalkmedia.co.uk/wp-content/uploads/2015/06/penguin-random-house-600x403.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029620',\n",
       "  'url': 'https://i2.wp.com/www.speechtherapyfun.com/wp-content/uploads/2016/07/Slide2.png?resize=576%2C394'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029693',\n",
       "  'url': 'https://chirosportsandwellness.com/wp-content/uploads/2018/01/chiro-logo.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029721',\n",
       "  'url': 'http://b.vimeocdn.com/ps/665/723/6657231_300.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029728',\n",
       "  'url': 'https://static5.groundgame.com/eng_il_Collection-Athletic-2-0-Kids-393.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029740',\n",
       "  'url': 'https://www.shenzhenjewelleryfair.com/Portals/35/Jewellery_and_Gem_Shenzhen_logo_RGB_L.png?ver=2020-02-06-184806-083'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029772',\n",
       "  'url': 'https://rosenthal-c00.kxcdn.com/thumbnails/images/showcase/f5/6f/fc/de/25container_498x750_food_presenter_20-w341-center.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029797',\n",
       "  'url': 'https://www.thecometonline.com/wp-content/uploads/2021/02/A-guide-to-a-healthier-quarantine.png'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029803',\n",
       "  'url': 'http://vafloc02.s3.amazonaws.com/isyn/images/f828/img-2139828-m.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029808',\n",
       "  'url': 'https://us.123rf.com/450wm/bartusp/bartusp1902/bartusp190200390/116736627-different-letters-flag-of-france-and-question-do-you-speak-french.jpg?ver=6'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029864',\n",
       "  'url': 'https://worldtruthvideos.website/upload/photos/2021/04/NNS43eVslWQvyM6E3fua_27_1fb2acbd3637ffe9bef8383b5451e864_image.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029910',\n",
       "  'url': 'https://ecdn.teacherspayteachers.com/thumbitem/-1-Deal-Number-Cards-0-100-3308792-1501698550/original-3308792-1.jpg'},\n",
       " {'ex_idx': '00002',\n",
       "  'in_idx': '000029920',\n",
       "  'url': 'https://img1.etsystatic.com/201/0/17118105/il_340x270.1477136441_dal3.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030183',\n",
       "  'url': 'http://rlv.zcache.co.uk/keep_calm_and_drink_wine_grapes_alcohol_social_dri_case-r0f0ac9275ce24ad5b9a308f443d58d51_80cs8_8byvr_324.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030294',\n",
       "  'url': 'https://ia802703.us.archive.org/view_archive.php?archive=/21/items/olcovers199/olcovers199-L.zip&file=1994634-L.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030371',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1917/6859/products/DSC_6743_1024x1024.jpg?v=1573500607'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030472',\n",
       "  'url': 'https://thumbs4.ebaystatic.com/d/l300/pict/162348978159_1.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030546',\n",
       "  'url': 'https://img-1.fyyd.de/pd/layout/35224bede40395450f977de3d010f6613d86.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030588',\n",
       "  'url': 'https://www.holdson.com/images/thumbs/0006534_4m-science-motorised-robot-hand_360.jpeg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030594',\n",
       "  'url': 'http://i.ytimg.com/vi/TU_hEUl9cMo/hqdefault.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030704',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/51752444e4b0f7b91d66f159/1513215184927-03YZELSF26HA9V3LSYOI/?format=1000w&content-type=image%2Fjpeg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030725',\n",
       "  'url': 'https://static3.bigstockphoto.com/thumbs/9/1/5/large2/51984985.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030888',\n",
       "  'url': 'https://www.uphe.com/sites/default/files/styles/scale__344w_/public/2015/04/025192208942_DVD_2D-X.png?itok=wQRb_UYH'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000030920',\n",
       "  'url': 'https://www0.alibris-static.com/the-art-of-the-lord-of-the-rings/isbn/9780618510986_l.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031036',\n",
       "  'url': 'https://iwantnitroicecream.com/wp-content/uploads/fun-mathheets-multiplication-flower-for-kids-printable-pack-the-color-by-montessoriheet-waldorf-nature-stunning-672x1008.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031067',\n",
       "  'url': 'https://i1.wp.com/8subjects.com/wp-content/uploads/2017/08/plugins.jpg?fit=1280%2C720'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031085',\n",
       "  'url': 'https://freevector.co/wp-content/uploads/2011/09/fx-by-ram-golf.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031100',\n",
       "  'url': 'https://s3.amazonaws.com/tkpro-assets/bow_2020/bow_2020_badges(120x120).png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031128',\n",
       "  'url': 'http://www.plant-magic.co.uk/userfiles/blog/t_56.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031158',\n",
       "  'url': 'https://image.spreadshirtmedia.com/image-server/v1/compositions/1016496174/views/1width=300height=300appearanceId=351backgroundColor=E8E8E8version=1489401380/it-s-never-too-late-mature-couple-wedding-men-s-t-shirt.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031187',\n",
       "  'url': 'https://mfcdn.de/product/300x500/louis-vuitton-clochette-mit-schloss-388c6c.jpeg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031194',\n",
       "  'url': 'https://images10.newegg.com/ProductImage/A12K_129887030631380063OvAsGfwBJD.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031315',\n",
       "  'url': 'https://i.ytimg.com/vi/ZBtdTiHsQqI/0.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031359',\n",
       "  'url': 'https://images.justlanded.com/directory_images/Ireland_Dublin/78/TOPCHEFS-Careers-Recruitment/photo/big_scaled_86701_11211_logo.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031422',\n",
       "  'url': 'https://is1-ssl.mzstatic.com/image/thumb/Purple114/v4/4e/7e/79/4e7e7907-db65-f9c8-a8de-b0ca66e5684e/source/512x512bb.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031449',\n",
       "  'url': 'https://www.bestmessage.org/wp-content/uploads/2018/04/friday-the-13th-good-luck-messages.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031464',\n",
       "  'url': 'http://fooddaycelebration.org/wp-content/uploads/2013/04/HLA_Final_Logo-large-webonly.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031492',\n",
       "  'url': 'https://prodimage.images-bn.com/pimages/9781935978732_p0_v5_s550x406.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031543',\n",
       "  'url': 'http://images.footballfanatics.com/FFImage/thumb.aspx?i=/productImages/_648000/ff_648157_xl.jpg&amp;w=180'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031607',\n",
       "  'url': 'https://1.bp.blogspot.com/-olvHmEGC6M4/XdBNsNINZWI/AAAAAAAAKnE/SCJiq-pbUs8BqQ1CDatqBTuhXp9bvrSiwCLcBGAsYHQ/s1600/parveen%2Bshakir%2Bpoetry%2Bin%2Burdu%2B%25282%2529.webp'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031713',\n",
       "  'url': 'https://s.s-bol.com/imgbase0/imagebase3/large/FC/5/8/7/2/9200000035872785.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031743',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1787/2513/products/komedie-4x-png_480x480.png?v=1539689562'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031755',\n",
       "  'url': 'https://cdn.knoji.com/images/logo/slimleatherjacketscom.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031794',\n",
       "  'url': 'https://lh6.googleusercontent.com/proxy/djg5z7yNFWFkBKCo_18449AAyjho8qr0ofCU91rpB9g_z-Ngu9-V1qGoACmMGWZSSNwslwou00F0dqsvtYyRhspR4a-RSY5V2dlkDfIeAcGqtWjsVc0vxZ5slPPN2oeQdxaMvaGDFSU=s0-d'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031806',\n",
       "  'url': 'http://e2.365dm.com/14/01/16-9/20/li-na_3060486.jpg?20140103114207'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031838',\n",
       "  'url': 'http://oldies.scdn5.secure.raxcdn.com/i/boxart/w340/17/36/883316173602.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031839',\n",
       "  'url': 'https://res-4.cloudinary.com/simpleview/image/fetch/c_fillf_autoh_300q_75w_300/https://res.cloudinary.com/simpleview/image/upload/v1427052866/clients/roanoke15/Roanoke_Awards_58a27038-6644-4064-a4ba-b14f3edc8852.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031847',\n",
       "  'url': 'http://www.texascraftykitchen.com/wp-content/uploads/2016/04/A-Z-Kitchen-Organizing-Pin.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031892',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0057/6159/7510/products/41wzje-SuML_bc03befa-698a-4e0c-bce1-0bf219aef904_195x195@2x.jpg?v=1575474619'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031909',\n",
       "  'url': 'https://img.youtube.com/vi/U027HlrtwzA/0.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031948',\n",
       "  'url': 'https://www.surfertoday.com/images/stories/surfsummerdeals.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031983',\n",
       "  'url': 'https://static.edealer.ca/V3_1/assets/images/new_vehicles_images_coming.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000031988',\n",
       "  'url': 'https://cdn11.bigcommerce.com/s-eoqdgo/images/stencil/500x659/products/5633/79090/997e4524f8d216fc4ca78391c9bbd6be696e1ce6__46306.1580691796.jpg?c=2'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032068',\n",
       "  'url': 'https://noiseandheatreduction.co.za/wp-content/uploads/2018/10/cropped-Logo.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032079',\n",
       "  'url': 'http://i4.ytimg.com/vi/pOmtzdSLVIQ/0.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032132',\n",
       "  'url': 'https://i.ytimg.com/vi/1JxL7qaDk9w/0.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032157',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/61-19os-saL.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032160',\n",
       "  'url': 'https://image.spreadshirtmedia.net/image-server/v1/compositions/P21743842T281A2PC31660930PA447PT17X38Y49S52/views/2width=300height=300appearanceId=2backgroundColor=E8E8E8version=1464324274/los-angeles-california-barneskjorter-poloskjorte-slim-for-menn.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032162',\n",
       "  'url': 'https://cdn.quotesgram.com/small/88/50/1056512902-Quotes_to_Start_the_New_School_Year_from_Clever_Classroom.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032176',\n",
       "  'url': 'http://cdn2.bigcommerce.com/n-biq04i/6c9tf/products/86/images/284/preserves_blackberry__16548.1383772329.386.513.jpg?c=2'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032190',\n",
       "  'url': 'https://d1wli5mq9yq9mw.cloudfront.net/files/cards/full/JPSTATIONERY104.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032217',\n",
       "  'url': 'http://tse4.mm.bing.net/th?id=OIP.EGvJhvpFhfHYlXptmfpCIAHaH_'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032285',\n",
       "  'url': 'http://www.meteocat.org/wp-content/uploads/2020/09/coloring-pages-ideas-fantasticesus-loves-me-printables-photo-inspirations-page.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032323',\n",
       "  'url': 'https://static3.bigstockphoto.com/thumbs/2/5/7/large2/75245572.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032330',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1730/4297/products/CL-BR-FLM_1ba4d98f-a371-4ad6-a031-d4624534a029_195x195@2x.jpg?v=1535396230'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032331',\n",
       "  'url': 'http://blog.continentalclub.co.uk/wp-content/uploads/2016/11/Continental-Club-Air-Fare-Alert-BA-BF-630x490.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032393',\n",
       "  'url': 'https://thelovelylifestyle.files.wordpress.com/2013/11/today-is-a-good-day-tea-towel.jpg?w=500'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032396',\n",
       "  'url': 'http://ourpeacefulplanet.com/wp/wp-content/uploads/2015/03/Clothespin-Dragon-Fly-Note-Holder-Tutorial.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032450',\n",
       "  'url': 'https://i0.wp.com/www.rushinformation.com/wp-content/uploads/2016/04/Farming-Simulator-compressed.jpg?fit=613%2C331'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032642',\n",
       "  'url': 'http://freevector.co/wp-content/uploads/2009/04/nnmotors.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032705',\n",
       "  'url': 'http://img2.imagesbn.com/p/9780124016781_p0_v1_s260x420.JPG'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032804',\n",
       "  'url': 'https://gentlemint-media.s3.amazonaws.com/images/2016/10/15/829d37ff.jpg.757x975_q85.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032805',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0958/5794/products/Green-Bay-Packers-50-60-Singular-Printed-Throw-Blanket_58378d5a-73bd-4cb3-a656-98c5f0dc419a_large.jpg?v=1551304842'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032859',\n",
       "  'url': 'https://motionarray-portfolio.imgix.net/preview-86972-f4c9eccd90c94854b5c769851f7a4e11-low.png?w=660&q=60&fit=max&auto=format'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032886',\n",
       "  'url': 'https://i.imgur.com/4yBk2ar.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032899',\n",
       "  'url': 'http://static9.depositphotos.com/1006708/1121/i/450/depositphotos_11218814-Last-Will-and-Testament-and-glass-of-whiskey.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032904',\n",
       "  'url': 'https://content.cdntwrk.com/mediaproxy?url=https%3A%2F%2Fwww.concordiatechnology.org%2Fhubfs%2F_blogs%2Ftechnology-and-your-ministry%2F2018%2F09%2Fcontinued-education-free-ways-to-continue-your-education-seminary-youtube-blog-post.png%3Ft%3D1543598284641%23keepProtocol&size=1&version=1543607592&sig=0e62bbc1a99b77d2f05bff15bbf5528e&default=hubs%2Ftilebg-blogs.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032939',\n",
       "  'url': 'https://www.yesteryearbooks.co.uk/assets/images/product/052297.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032949',\n",
       "  'url': 'https://d24m66tiq5iban.cloudfront.net/pics/property/397739282/3/IDX_3/v3//crop/656400/'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032975',\n",
       "  'url': 'https://d2pafxp37ue0ak.cloudfront.net/system/zen_products/images/4988/shop/2014_0205_ka081.jpg?1553544215'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000032984',\n",
       "  'url': 'http://beautygeekuk.com/wp-content/uploads/2015/05/Mavala-Summer-and-Garden-Party-Collection1-960x1024.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033028',\n",
       "  'url': 'https://nordicbyname.dk/wp-content/uploads/2020/08/Glitter_sort.boss_11promax-300x300.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033036',\n",
       "  'url': 'https://platform.nashvilleparent.com/media/BambiniVillage-LOGO-web.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033077',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/2099/9145/products/tacoma-fuji-records-tacoma-fuji-records-inc-t-shirt-navy-supplies-and-co-2_600x.jpg?v=1599138331'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033221',\n",
       "  'url': 'https://s1.yimg.com/bt/api/res/1.2/otDIwb0r06dzauz43WN2Mg--/YXBwaWQ9eW5ld3M7Zmk9ZmlsbDtoPTI2MTtweW9mZj0wO3E9NzU7dz0zNTA-/http://media.zenfs.com/en_US/News/TheWrap/directv_main.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033226',\n",
       "  'url': 'http://rlv.zcache.co.uk/kiev_ukraine_english_ukrainian_language_key_ring-rb5cabfe968214cb989fb14f2d1b6018c_x76wx_8byvr_324.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033244',\n",
       "  'url': 'https://img1.etsystatic.com/016/0/6652828/il_340x270.454627855_68lv.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033294',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0781/1065/products/2DTHRFLAME10000_300x300.png?v=1577570006'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033361',\n",
       "  'url': 'https://www.nissan-cdn.net/content/dam/Nissan/gb/vehicles/Navara-NP300/d23/1_carryover/overview/Navara_Video_HackNo4_BcFsAlqg2jE_Thumbnail.png.ximg.l_full_m.smart.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033364',\n",
       "  'url': 'http://i.vimeocdn.com/portrait/3348655_300x300.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033428',\n",
       "  'url': 'https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F91987371%2F403569244279%2F1%2Foriginal.20200211-180055?w=512&auto=format%2Ccompress&q=75&sharp=10&rect=1%2C312%2C1274%2C637&s=18b2ae1659447d0fcf839237c4d89860'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033433',\n",
       "  'url': 'https://gravelroadseries.it/wp-content/uploads/2019/12/specoalized-logo-1024x394.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033492',\n",
       "  'url': 'https://bioprox.es/1179-home_default/vitobest-fat-burner-triple-accion-90-caps.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033522',\n",
       "  'url': 'https://upload.wikimedia.org/wikipedia/en/8/82/Joanna_-_Kool_%26_The_Gang.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033548',\n",
       "  'url': 'http://i.vimeocdn.com/portrait/853297_300x300.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033586',\n",
       "  'url': 'https://pcgimg.azureedge.net/Upload/Product/37771-i-just-wanna-dance-Photo1-20200103114654.jpg?maxwidth=700'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033608',\n",
       "  'url': 'https://cdn-ec.niceshops.com/upload/image/product/medium/default/sylveco-firming-natural-soap-120-ml-1219283-it.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033629',\n",
       "  'url': 'https://i.ytimg.com/vi/jqmTALgobsU/0.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033700',\n",
       "  'url': 'https://m.media-amazon.com/images/I/51AWBdzXORL._SL320_.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033710',\n",
       "  'url': 'https://webassets.inman.com/wp-content/uploads/2014/12/linkedin-lead-generation-1400x621.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033730',\n",
       "  'url': 'https://skoolopedia.com/app/upload/2017/02/c5276aec0a2a481e9e3295e99efac9d0.jpeg.square-sm.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033751',\n",
       "  'url': 'https://i.pinimg.com/736x/43/0f/f5/430ff5977045e7a4c8d9a61598681d8c--chalk-fonts-chalk-lettering.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033762',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1308/1459/products/my-gardening-mug_per302-001_grande.jpg?v=1474976923'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033807',\n",
       "  'url': 'https://i.imgflip.com/1ujcg2.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000033985',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/51s6FAS-fXL.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034033',\n",
       "  'url': 'https://zezeewithbooks.files.wordpress.com/2015/09/crown-of-midnight.jpg?w=645'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034067',\n",
       "  'url': 'http://i01.i.aliimg.com/wsphoto/v0/436016105/Free-Shipping-New-Swimmer-ipx8-Sport-Waterproof-MP3-Player-2GB-Swimming-Running-Surfing-Blue-color-.jpg_350x350.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034250',\n",
       "  'url': 'http://s3.amazonaws.com/libapps/accounts/50774/images/idea_lab_logo.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034264',\n",
       "  'url': 'https://mobimg.b-cdn.net/v2/fetch/81/81343e5a428bfd8d14dbb5878cf42081.jpeg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034325',\n",
       "  'url': 'https://s3.amazonaws.com/webassets.ticketmob.com/TS/images/comedians/LaGrangeatributetoZZTop.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034331',\n",
       "  'url': 'https://www.clumsycrafter.com/wp-content/uploads/2013/11/7-Elf-on-the-Shelf-Ideas.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034440',\n",
       "  'url': 'https://ecelonline.disaldigital.com.br/content/images/thumbs/0069508_macmillan-english-grammar-in-context-intermediate-without-key-with-cd_550.jpeg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034457',\n",
       "  'url': 'https://npirtube.com/wp-content/uploads/2017/04/plugin_yeelight_desk_lamp.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034480',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/v1/538d1920e4b0369e0b93e74b/1415043461328-Q4T9H9ATMQYM3JOLR09L/ke17ZwdGBToddI8pDm48kPx25wW2-RVvoRgxIT6HShBZw-zPPgdn4jUwVcJE1ZvWQUxwkmyExglNqGp0IvTJZUJFbgE-7XRK3dMEBRBhUpwGbtSA7WutlFA3XjmDXUDFwmxX_uEhqHOBUlPnU0mYmf1Qvd6diXKmxQIX-f1CXeo/Child-of-the-Chozo-Will-Brueggemann-super-marcato-bros'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034567',\n",
       "  'url': 'https://menslifeadvice.com/wp-content/uploads/2015/03/fi_17-2.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034609',\n",
       "  'url': 'https://www.signs.com/blog/wp-content/uploads/2019/04/Ferrari-Acquisition-02.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034676',\n",
       "  'url': 'https://images.bwbcovers.com/054/9780547006956.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034714',\n",
       "  'url': 'http://kingstonpound.org/wp-content/uploads/K%C2%A3-trail-1-3-1.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034738',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0354/9655/8651/products/3bb52418-3baf-5537-9ecf-05ec0794ff4a_71e9cef4-0c33-4979-8d66-39d011890d02_1024x1024.jpg?v=1596057924'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034749',\n",
       "  'url': 'http://www.londoncoins.co.uk/img.php?a=157&amp;l=2742&amp;f=r&amp;s=t'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034789',\n",
       "  'url': 'https://flipgive.imgix.net/images/campaigns/photos/000/051/929/width_480/1479752732AHSwimTeam_App.png?ch=Width%2CDPR%2CSave-Data&amp;auto=format%2Ccompress&amp;dpr=2&amp;format=jpg&amp;w=263'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034817',\n",
       "  'url': 'https://www.fabulous-femme.com/wp-content/uploads/2015/05/lifeistilive-quote.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000034876',\n",
       "  'url': 'https://base-ec2if.akamaized.net/w=2048a=0q=90u=0/images/user/logo/8311a3198df4251f9eee72b06cf3463d.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035021',\n",
       "  'url': 'https://cdn.fitimg.in/studio_logo_34AE8B5F1E4DC5.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035050',\n",
       "  'url': 'https://cooklikejames.typepad.com/.a/6a010536eec1a6970c0133eca04c3e970b-800wi'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035114',\n",
       "  'url': 'https://i2.wp.com/owsaprint.com/wp-content/uploads/2019/03/Screenshot_46.jpg?resize=300%2C300&amp;ssl=1'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035143',\n",
       "  'url': 'https://www.catchopcd.net/4723-large_default/gfriend-2nd-mini-album-flower-bud-reissue.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035346',\n",
       "  'url': 'http://coloradopols.com/wp-content/uploads/2013/06/I-am-created-equal-logo-300x300.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035351',\n",
       "  'url': 'http://www.talkncoffee.com/images/Chemical%20Solvent%20Free%201.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035359',\n",
       "  'url': 'https://sandro-keil.de/slides/img/docker/sticker-swarm.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035529',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/51fVGnt3RTL._SL300_.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035608',\n",
       "  'url': 'https://img0-placeit-net.s3-accelerate.amazonaws.com/uploads/stage/uploaded_thumb_image/2831/large_thumb_IMG_8686_thumb.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035735',\n",
       "  'url': 'https://cdn.proline-rus.ru/b1750/7dafb/b1a33/5b9c0/c36cd/cad1e/25773/8360.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035797',\n",
       "  'url': 'http://www.HipHopSince1987.com/wp-content/uploads/2012/05/chill-moody-so-in-love-ft-aaron-camper-prod-by-dilemma-2012-HHS1987.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035844',\n",
       "  'url': 'https://snugglesquilts.com/wp-content/uploads/2017/08/blooming-patchwork-book-300x300.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035856',\n",
       "  'url': 'https://s3-ap-southeast-2.amazonaws.com/bookhunter/media/catalog/product/h/400/9/7/9780759102798.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035954',\n",
       "  'url': 'http://cdn.pastemagazine.com/www/blogs/lists/2010/12/01/best_of_2010.jpg?635298359223700934'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000035980',\n",
       "  'url': 'http://rprnrwxhpnpq5p.leadongcdn.com/cloud/jkBooKnnSRmpplirj/55555555585591415252.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036054',\n",
       "  'url': 'http://cdn33.printdirect.ru/cache/product/c1/41/4032859/tov/all/400z400_front_722_0_0_0_11a7159796bd8a8e55b918a7bf7e.jpg?rnd=1338059716'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036248',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/5616c092e4b06489ba877e15/1508812844111-R27JCLC1Q7L6YV9MOQNT/Dirtt-Logo-Brown_bluebigger.png?content-type=image%2Fpng'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036367',\n",
       "  'url': 'http://previewcf.turbosquid.com/Preview/2011/08/12__15_29_09/3.jpg2e4506c9-9103-4a79-bb65-c6e293f192f5Large.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036377',\n",
       "  'url': 'http://ecimages.kobobooks.com/Image.ashx?imageID=FGBNf5N-fUue3W0znxYRjA&amp;Type=Full'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036413',\n",
       "  'url': 'https://static1.squarespace.com/static/55fc7592e4b0f81e4e5760a4/t/55ff11f7e4b08aa6c7f6a7c1/1529071091411/?format=750w'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036441',\n",
       "  'url': 'http://www.unisealshop.com/Uniseals/photos/3-inch-uniseal-insitu_2.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036529',\n",
       "  'url': 'https://is3-ssl.mzstatic.com/image/thumb/Podcasts123/v4/8f/74/39/8f74397c-1715-58dd-330b-cb7346e94a01/mza_4523520513334220402.jpg/600x600bb.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036586',\n",
       "  'url': 'https://www.interactive.org/images/games_developers/nbc_sm.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036706',\n",
       "  'url': 'https://mk0blogpublicgorygjw.kinstacdn.com/wp-content/uploads/2020/02/PublicGoods_ContentImagery_202002_CoconutShorelineWaikiki_Thumbnail_3200x3200-1.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036746',\n",
       "  'url': 'https://rlv.zcache.com/funny_accent_yankee_wicked_smart_smaht_bostonian_trucker_hat-rd12044e06a164d53bde2523641d4bb85_eahwi_8byvr_324.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036755',\n",
       "  'url': 'https://i2.wp.com/adventuresinnewengland.com/wp-content/uploads/2020/12/Mystic-Pin.png?fit=603%2C930&ssl=1'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036762',\n",
       "  'url': 'https://us.123rf.com/450wm/diagon/diagon1311/diagon131100015/23755218-furniture-icons.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036805',\n",
       "  'url': 'http://wzgamerslab.net/img_games/2018_tri1/treasure_adventure_world_principal.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036864',\n",
       "  'url': 'https://www.drfergusonaz.com/wp-content/uploads/2016/02/On-The-Road-To-Good-Dental-Health-Logo.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036872',\n",
       "  'url': 'http://images.randomhouse.com/cover/9780307388629'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036875',\n",
       "  'url': 'https://img1.od-cdn.com/ImageType-400/6611-1/776/599/81/%7B77659981-5BBD-4A42-BF90-38784BDBCF04%7DImg400.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036886',\n",
       "  'url': 'http://rlv.zcache.co.uk/celebrate_diversity_custom_announcement-r552fa0726bf44280aa04831f24b37a8a_imtq3_8byvr_324.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000036938',\n",
       "  'url': 'http://static1.1.sqspcdn.com/static/f/389779/6233054/1602516308790/LDM-new-new2.png?token=1WxhCdrajbk%2BS%2FOBB2%2BMO5pCh7s%3D'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037048',\n",
       "  'url': 'http://1.bp.blogspot.com/_-0OPosueKrE/TOLJUFxlt6I/AAAAAAAAA5E/IBOKW9dJWG8/s400/acousticep.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037095',\n",
       "  'url': 'https://prnewswire2-a.akamaihd.net/p/1893751/sp/189375100/thumbnail/entry_id/0_h60be8fb/def_height/400/def_width/400/version/100012/type/1'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037333',\n",
       "  'url': 'http://i00.i.aliimg.com/wsphoto/v6/1490710460_1/New-2014-Fashion-Women-Blouses-Hot-Selling-Loose-font-b-Animal-b-font-Flower-Printed-Chiffon.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037361',\n",
       "  'url': 'https://static.ok.co.uk/media/images/300x400_ct/1150563_as_5758eccf1fa6d109e673a6a166dcf269.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037438',\n",
       "  'url': 'https://2.bp.blogspot.com/-HKJRyGBTp2Y/V5odddpIeII/AAAAAAACYbw/D7Ni6EMh8_YsqTOsRNxepYar0eOqWlMuACLcB/s1600/buttermilk%2Bpound%2Bcake%2Bcollage%2B3.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037459',\n",
       "  'url': 'http://i92.photobucket.com/albums/l37/theBULLDOGfan/oregon_lottery_300px.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037462',\n",
       "  'url': 'http://rlv.zcache.com.au/stop_global_whining_mugs-r644896d0bb574861a045d3705c0be07a_x7jgr_8byvr_324.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037564',\n",
       "  'url': 'http://3.bp.blogspot.com/-_pWLIJNlPpE/UwTS_p6NxvI/AAAAAAAAA_g/ulJrVNSc5ko/s1600/discovery-history-black-salvatier.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037571',\n",
       "  'url': 'https://sr20.driftworks.com/media/catalog/product/cache/1/small_image/280x/9df78eab33525d08d6e5fb8d27136e95/i/m/image_CVR22090P5L4566BT_18101_1_1.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037579',\n",
       "  'url': 'http://onceuponanalpha.bookblog.io/wp-content/uploads/sites/126/2016/05/Conviction-hi-res-683x1024.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037674',\n",
       "  'url': 'https://static1.bigstockphoto.com/thumbs/9/2/1/large2/129018050.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037692',\n",
       "  'url': 'https://3.bp.blogspot.com/-ZOJFCvPg4fc/VW6qJLWELOI/AAAAAAAAnS0/pojE2B10SVw/s1600/Android%2BErrore%2Bspazio%2Bdi%2Barchiviaizone%2Binsufficiente.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037726',\n",
       "  'url': 'https://www.geraldedwardwilliamshepherd.com/medias/mini/g/e/geraldshepherd/artwork/11789543_movements-in-sky-and-sand.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037878',\n",
       "  'url': 'https://www.prlog.org/12334260-disaster-recovery-answering-services.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037887',\n",
       "  'url': 'https://image.spreadshirtmedia.net/image-server/v1/compositions/137735145/views/1width=300height=300version=1456916136/nerd-geek-freak-top-top-da-donna-ecologico.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037892',\n",
       "  'url': 'https://us.123rf.com/450wm/fosin/fosin1508/fosin150800092/44414257-vector-set-of-different-glasses-on-white-background-retro-wayfarer-aviator-geek-hipster-frames-man-a.jpg?ver=6'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000037978',\n",
       "  'url': 'https://www.newjerseyclub.ru/upfile/pladd/Lakers--32-Magic-Johnson-Purple-Basketball-Swingman-Statement-Edition-Jersey.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038014',\n",
       "  'url': 'https://i.ytimg.com/vi/BsHztnIjVQc/hqdefault.jpg?sqp=-oaymwEjCPYBEIoBSFryq4qpAxUIARUAAAAAGAElAADIQj0AgKJDeAE=&amp;rs=AOn4CLCYJ2LEpjEcpzWkJiEnEDlx-oRyRQ'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038063',\n",
       "  'url': 'https://images-na.ssl-images-amazon.com/images/I/51F9qzVPkYL.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038077',\n",
       "  'url': 'https://i.imgur.com/M0fAvZ1.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038108',\n",
       "  'url': 'https://images-na.ssl-images-amazon.com/images/I/41hazvbAxNL._SX352_BO1204203200_.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038130',\n",
       "  'url': 'https://secureservercdn.net/198.71.233.254/25d.494.myftpupload.com/wp-content/uploads/2017/05/cropped-logo-pic-from-phone-2_LI-6.jpg?time=1581808610'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038156',\n",
       "  'url': 'http://rlv.zcache.co.nz/gerbils_are_awesome_greeting_cards-r257f1a131fd14fc39e93fae639ab11f1_xvuak_8byvr_324.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038179',\n",
       "  'url': 'https://srisriuniversity.edu.in/wp-content/uploads/2018/11/5th-Convocation-Pic-1-409x258.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038250',\n",
       "  'url': 'https://ntseniorscard.org.au/wp-content/uploads/Optical-Superstore-550x550.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038256',\n",
       "  'url': 'http://assets.cat5.com/images/catalog/products/3/5/1/9/8/0-325-mechanix-wear-the-original-grip-black.jpg?v=15009'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038329',\n",
       "  'url': 'https://careerbeacon-canada.s3.ca-central-1.amazonaws.com/company/228926/5aff0fd6190d3.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038330',\n",
       "  'url': 'https://1.bp.blogspot.com/-xUzc0v_XHmE/Wz7pQKeJLYI/AAAAAAACcaw/IEiMPbdBnfYb44xMW3dMfjaliYrPYUXcwCLcBGAs/s1600/d.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038339',\n",
       "  'url': 'https://s01.sgp1.cdn.digitaloceanspaces.com/book/115200-wcwlakdtfy-1556807365.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038373',\n",
       "  'url': 'http://musicjap.com/images/101/various-eurobeat-best-hits.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038382',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/55785642e4b0e3cc13013b23/1456514420456-ZNZV3K6RTZTIL6GFNT7O/a-brialliant-smile-ebook-cover.png?content-type=image%2Fpng'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038478',\n",
       "  'url': 'https://asweetlife.org/wp-content/uploads/2012/03/JDRF-type-1-diabetes-research-summit-Riva-Greenberg.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038494',\n",
       "  'url': 'https://www.nikkilynndesign.com/wp-content/uploads/2017/06/Striped-Weasel-Thirteen-Lined-Ground-Squirrel-Digging-Holes-in-Garden-and-Yard.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038513',\n",
       "  'url': 'https://harrowgrace.org/wp-content/uploads/2016/03/YouTube-icon.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038586',\n",
       "  'url': 'https://i.servimg.com/u/f85/19/81/18/31/red-ho12.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038642',\n",
       "  'url': 'https://www.zdidit.com/wp-content/uploads/2016/02/add-sitelinks-searchbox-with-yoast-seo-320x320.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038696',\n",
       "  'url': 'http://www.tennisnow.com/images/2019/September/Kenin-9-20-19.aspx'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038715',\n",
       "  'url': 'https://ih0.redbubble.net/image.309167950.5709/raunisex_tshirtx2000101010:01c5ca27c6front-c490436420460-pad420x460f8f8f8.u1.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038719',\n",
       "  'url': 'http://freevector.co/wp-content/uploads/2012/04/clock-watching.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038726',\n",
       "  'url': 'https://www.storemypic.com/images/2016/11/04/good-evening-friends-cake-74042.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038759',\n",
       "  'url': 'https://files.speakerdeck.com/presentations/450cab6d72d6454e9e51a03c028bbc93/slide_30.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038792',\n",
       "  'url': 'https://image.spreadshirtmedia.net/image-server/v1/mp/compositions/T814A366PA1675PT17X41Y10D16422977S106CxFFFFFF/views/1width=400height=400appearanceId=366backgroundColor=C20329noPt=trueversion=1494487205/dear-santa-i-can-explain-koszulka-dziecieca-premium.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038830',\n",
       "  'url': 'http://blogue.dessinsdrummond.com/wp-content/uploads/2015/04/Drummond-House-Plans-Garage-plan-2989-32-1.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038895',\n",
       "  'url': 'https://s3-eu-west-1.amazonaws.com/storage.quickbutik.com/stores/1637U/products/5bfba3aa9d77e.jpeg?w=250&h=270&fit=crop&auto=format'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038910',\n",
       "  'url': 'http://i1.cpcache.com/product/183440425.jpg?height=150&width=150'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038946',\n",
       "  'url': 'http://smilegreat.com/wp-content/uploads/2016/12/Pic28-cropped.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038949',\n",
       "  'url': 'http://img.picturequotes.com/2/48/47743/talk-is-cheap-because-supply-exceeds-demand-quote-1.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000038952',\n",
       "  'url': 'https://i2.wp.com/justabouttv.fr/wp-content/uploads/336522-9.jpg?fit=680%2C1000'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039008',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0445/8423/9272/collections/advanced-nutrients-logo-retina_330x330@2x.png?v=1602159335'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039107',\n",
       "  'url': 'https://images-na.ssl-images-amazon.com/images/S/cmx-images-prod/Series/64553/c21780779d1f946881aab4edae2aa9f7._SX312_QL80_TTD_.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039150',\n",
       "  'url': 'https://cache-graphicslib.viator.com/graphicslib/media/06/at-moulin-rouge-en-el-molino-rojo-photo_5997062-770tall.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039187',\n",
       "  'url': 'http://cdn3.volusion.com/qvzw6.ewf2b/v/vspfiles/photos/CPYOYOAXLBLUE-2T.jpg?1366296987'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039205',\n",
       "  'url': 'http://www.reigatedrivingschools.co.uk/wp-content/uploads/2015/04/pass-you-theory-test-banner-1-495x400.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039245',\n",
       "  'url': 'https://1.bp.blogspot.com/-JPTVPAsnFfQ/VZnyUzOUJcI/AAAAAAAAF6Q/XkPghIHTFW4/s1600/AA%2BF-414%2BEnhanced.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039325',\n",
       "  'url': 'http://cdn.pastemagazine.com/www/articles/2014/03/04/PasteSXSW_InteractiveLead.jpg?635299665269807641'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039421',\n",
       "  'url': 'https://m-cdn.phonearena.com/images/articles/79373-500/Sony-Ericcson-Windows-Phone-Jolie.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039433',\n",
       "  'url': 'https://www.indiexl.nl/wp-content/uploads/2015/06/1424854448.gangoffour-300x300.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039572',\n",
       "  'url': 'https://thechirpingmoms.com/wp-content/uploads/2017/04/9-Loo-Roll-Crafts-for-Kids-Square-2.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039588',\n",
       "  'url': 'https://www.producerspot.com/wp-content/uploads/2019/06/Black-Octopus-Sound-Professional-Deep-House-Essentials-300x300.jpeg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039661',\n",
       "  'url': 'https://enamewishes.com/myimages/Best-2020-New-Year-Wish-Name-Cake-320x320.png'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039712',\n",
       "  'url': 'https://content.cdntwrk.com/mediaproxy?url=http%3A%2F%2Fshare.opsy.st%2F5ed7dcbee5d3f-Embedded-Executive-Labrosse.jpg&size=1&version=1591205125&sig=ac736b9f02b68a424e95e4ed2ce40a7d&default=hubs%2Ftilebg-blogs.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039844',\n",
       "  'url': 'https://images.vexels.com/media/users/3/154998/list/1d6a18669b809ac8120c602876af6cc2-be-creative-school-camiseta-de-diseno.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039872',\n",
       "  'url': 'https://images.saymedia-content.com/.image/ar_8:10%2Cc_fill%2Ccs_srgb%2Cfl_progressive%2Cg_faces:center%2Cq_auto:good%2Cw_620/MTc0NTE4MDMxNTkyOTkwNjY1/the-chesapeake-bay-retriever-a-guide-for-owners.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039890',\n",
       "  'url': 'https://direct.rhapsody.com/imageserver/images/alb.24505732/500x500.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039940',\n",
       "  'url': 'http://images.allocine.fr/r_640_600/b_1_d6d6d6/medias/nmedia/18/78/35/82/20303823.jpg'},\n",
       " {'ex_idx': '00003',\n",
       "  'in_idx': '000039972',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0720/6291/t/2/assets/logo.png?6551784379037359213'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040065',\n",
       "  'url': 'https://i.pinimg.com/736x/5c/36/68/5c366820d9a009a6902ee5e1c6697063--space-cupcakes-kid-cupcakes.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040107',\n",
       "  'url': 'http://bonanzleimages.s3.amazonaws.com/afu/images/0616/9087/4a0d5f66a7029a3fae087116d9cb110a_1_.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040180',\n",
       "  'url': 'http://images.gr-assets.com/books/1448318289l/25184383.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040301',\n",
       "  'url': 'https://secureservercdn.net/198.71.233.199/rbl.451.myftpupload.com/wp-content/uploads/2015/01/The-Alchemist-Logo.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040340',\n",
       "  'url': 'http://blog-imgs-42.fc2.com/m/i/n/minefield/201208261613256b8.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040401',\n",
       "  'url': 'https://rlv.zcache.com.au/christening_banner_baptism_invitation_blue-r57711267c68845089a6388dfa8184ed1_zk9rh_324.jpg?rlvnet=1'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040471',\n",
       "  'url': 'https://images.lookhuman.com/render/standard/2044052650002600/6040-heathered_gray_nl-md-t-valentines-day-aint-nobody-got-time-for-that.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040566',\n",
       "  'url': 'https://images.booksense.com/images/824/712/9781942712824.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040571',\n",
       "  'url': 'http://rlv.zcache.co.uk/monogrammed_wedding_stickers-rebb6bfe0a2e545d7bd43c39e78e8e15c_v9waf_8byvr_324.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040704',\n",
       "  'url': 'http://rlv.zcache.com/keep_calm_and_fight_villains_post_cards-rc8bc0ad4abee4276b26801ef129447f6_vgbaq_8byvr_324.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040749',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/41N78grAHCL.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040753',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/2498/0760/products/GManera_WGM_OnlineShop_032_540x720.jpg?v=1547545823'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040793',\n",
       "  'url': 'https://gchmanhattan.pl/wp-content/uploads/2018/03/manhattanTV_17_kwadrat.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040812',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/31JD6ortxVL.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040838',\n",
       "  'url': 'http://rlv.zcache.co.nz/well_behaved_women_rarely_make_history_card-r6b9f0dc1e9264aa09e456d0bda2dd134_xvuat_8byvr_324.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040886',\n",
       "  'url': 'https://www.gghschool.com/wp-content/uploads/2020/05/ei-600x350.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000040931',\n",
       "  'url': 'https://daregreatlycoaching.com/wp-content/uploads/2017/12/20171214-Trap-of-your-own-making-400x270.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041040',\n",
       "  'url': 'https://www.soundaffects.com/images/products/thumbnails/1507203969-48938200.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041044',\n",
       "  'url': 'https://secureservercdn.net/198.71.233.195/7fe.723.myftpupload.com/wp-content/uploads/2016/02/20160131_150003-1-1024x392.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041061',\n",
       "  'url': 'http://tse3.mm.bing.net/th?id=OIP.-KTF4tl6w_xzzx7tPAuUQQHaEj'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041074',\n",
       "  'url': 'https://d3nuqriibqh3vw.cloudfront.net/styles/aotw_card_ir/s3/a_tale_of_one_city.jpg?itok=XSq1_JMF'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041111',\n",
       "  'url': 'https://images.justlanded.com/directory_images/Switzerland_Lucerne/16120/IMI-University-Centre-Switzerland/photo/big_scaled_86987_11909_logo.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041240',\n",
       "  'url': 'https://www.notovna.cz/images//zbozi/ML2984.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041264',\n",
       "  'url': 'http://i01.i.aliimg.com/wsphoto/v0/571075635/72inch-LCD-font-b-FPV-b-font-video-font-b-goggles-b-font-16-9-wide.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041383',\n",
       "  'url': 'https://aentcdn.azureedge.net/graphics/items/sdimages/a/500/1/8/5/7/1947581.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041417',\n",
       "  'url': 'http://i.qkme.me/3pyx9q.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041431',\n",
       "  'url': 'https://pics.showlettwestbooks.com/subdirectory13/29252.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041490',\n",
       "  'url': 'http://rlv.zcache.com/languid_lavender_and_white_quatrefoil_pattern_planner-rad9f27945d224873aa0197fd2f2148f3_2izru_8byvr_324.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041539',\n",
       "  'url': 'http://rlv.zcache.com.au/earth_high_recycling_team_mouse_pad-ra2cc59afaab04cdba566f436c8838144_x74vi_8byvr_324.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041565',\n",
       "  'url': 'http://yabibliophile.bookblog.io/wp-content/uploads/sites/19/2018/03/34728667.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041571',\n",
       "  'url': 'https://d2fkddr0p2jbv6.cloudfront.net/render/standard/iEq4S1R42NqAUaAL7qZIvpIIWEN0Ih0jWAJddg6eg5GTawEeEeaMXEKaaYG8DpOU/iphonex-blue-z1-t-did-someone-say-merica.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041777',\n",
       "  'url': 'https://i0.wp.com/www.talkofthestreets.com/wp-content/uploads/2015/08/Plies_Aint_No_Mixtape_Bih-front-large.jpg?resize=300%2C300'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041794',\n",
       "  'url': 'http://movietvtechgeeks.com/wp-content/uploads/2015/11/draftkings-weekly-update-new-york-ag-2015-nfl-images-600x315.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041885',\n",
       "  'url': 'http://img.picturequotes.com/2/20/19503/i-refuse-to-be-anything-less-than-successful-quote-1.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041919',\n",
       "  'url': 'https://blog.billiongraves.com//wp-content/uploads/2019/07/BillionGraves-Summer-Surfing-Contest.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041931',\n",
       "  'url': 'http://d202m5krfqbpi5.cloudfront.net/books/1387744391l/395587.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000041980',\n",
       "  'url': 'https://ruttendesign.nl/wp-content/uploads/2021/02/Rutten_Design-Logo_WIT-uai-258x285.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042019',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/1623/6609/products/image_2174e7d7-1a32-4b28-b769-845e7829c426_480x.jpg?v=1598213129'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042054',\n",
       "  'url': 'https://stuffhappens.us/wp-content/uploads/2015/02/temperament-quiz.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042128',\n",
       "  'url': 'https://mir-s3-cdn-cf.behance.net/projects/404/fd703e73333987.Y3JvcCwxMzgwLDEwODAsMjcwLDA.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042173',\n",
       "  'url': 'https://lh3.googleusercontent.com/BlC-ZWpBlbQvp7L6g0De0IdRn2aIifd53ijzPzULCtGwNKnGBb_uNxWkBwJww3mwCcw'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042195',\n",
       "  'url': 'https://d39l2hkdp2esp1.cloudfront.net/img/eps/E4153/c/E4153_ff.jpg?20190418194111'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042243',\n",
       "  'url': 'https://s3.images-iherb.com/bkm/bkm00660/u/1.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042246',\n",
       "  'url': 'https://a57.foxnews.com/static.foxbusiness.com/foxbusiness.com/content/uploads/2020/08/0/0/AP20211690502320.jpg?ve=1&tl=1'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042297',\n",
       "  'url': 'http://d.gr-assets.com/books/1348244912l/8343235.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042378',\n",
       "  'url': 'https://assets.audiomack.com/dj-warface/get-that-money-ft-chris-brown-french-montana-275-275.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042438',\n",
       "  'url': 'https://reverbraccoon.com/wp-content/uploads/2019/03/Farrow.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042454',\n",
       "  'url': 'https://deow9bq0xqvbj.cloudfront.net/image-logo/1297595/IMG_3223.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042457',\n",
       "  'url': 'https://edwardsfss.com/wordpress/wp-content/uploads/Torch-Club-logo.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042459',\n",
       "  'url': 'https://static.twentyoverten.com/5bb243e4d619867e5feafb99/B1MzoTEhm/image001.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042504',\n",
       "  'url': 'https://images.financialexpress.com/2018/07/binani-3-1-620x413.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042544',\n",
       "  'url': 'http://cdn.pastemagazine.com/www/articles/flanaganlead.jpeg?635494693631627879'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042559',\n",
       "  'url': 'https://3.bp.blogspot.com/-ja1x17EpeLw/WEWPBiEKkVI/AAAAAAAAiHI/pMgQReH4N60iZ_Pu_qntGQu6WfrvbQFqACLcB/s400/Slide55.JPG'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042563',\n",
       "  'url': 'https://images-na.ssl-images-amazon.com/images/S/cmx-images-prod/Series/81693/81693._SX312_QL80_TTD_.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042676',\n",
       "  'url': 'https://d2g43ubxtnccwi.cloudfront.net/52051_1_medium.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042680',\n",
       "  'url': 'https://professionalbuildersmerchant.co.uk/wp-content/uploads/2019/08/CPA-Construction-Products-Association.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042691',\n",
       "  'url': 'https://d33wubrfki0l68.cloudfront.net/fd733c3f2992dd850f3bca5ec5065e19b3de595d/78737/wp-content/uploads/2017/10/how-to-save-all-my-iphone-contacts-into-icloud.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042697',\n",
       "  'url': 'https://images-na.ssl-images-amazon.com/images/I/61XYQjlVq3L.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042736',\n",
       "  'url': 'https://i2.wp.com/goodsparkgarage.com/wp-content/uploads/2017/04/dead-ace-co-moto-supply-4.jpg?resize=1024%2C683'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042753',\n",
       "  'url': 'https://websitesandcoffee.com/wp-content/uploads/2012/06/wordpress-com-vs-org-300x281.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042768',\n",
       "  'url': 'http://images.moviepostershop.com/the-hangover-2-movie-poster-2011-1010698671.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042871',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0001/8032/2355/products/back_3114717b-9c6f-42fa-b785-cb6c8eb70e60_large.png?v=1576418474'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042963',\n",
       "  'url': 'https://i.chzbgr.com/full/5143192832/hD696F3A9/'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000042999',\n",
       "  'url': 'http://melbourneletteringclub.com/wp-content/uploads/2016/11/rob_clarke_work_9.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043000',\n",
       "  'url': 'https://i1.wp.com/motherhoodinmay.com/wp-content/uploads/2017/12/GIFT-GUIDE-2.png?resize=400%2C500&amp;ssl=1'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043009',\n",
       "  'url': 'https://pokermaniashop.com/1235-home_default/carte-bicycle-magic-double-face-scatola-blu-o-rossa.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043104',\n",
       "  'url': 'https://cdn-origin.bibliocommons.com/images/CO-PUEBLO/logo.png?1397900483289'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043162',\n",
       "  'url': 'http://www.tnfarmersbuyersguide.com/images/adv/611/200008611l.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043206',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/51B3gsMN5WL._SL300_.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043221',\n",
       "  'url': 'https://i0.wp.com/www.amour-des-saveurs.com/wp-content/uploads/2019/03/candy-melts-12ozbright-white.jpg?fit=300%2C300&ssl=1'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043240',\n",
       "  'url': 'https://f4.bcbits.com/img/a2412680191_16.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043350',\n",
       "  'url': 'https://i.imgur.com/oirSj.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043368',\n",
       "  'url': 'http://rlv.zcache.com/dream_mer_mermaid_posters-r5f32bf37eeb947a3a045fb550deaf861_fq24_8byvr_324.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043378',\n",
       "  'url': 'http://g.christianbook.com/dg/product/cbd/f400/20745.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043440',\n",
       "  'url': 'http://www.quarterrockpress.com/media/k2/items/cache/b91e0a97ce980c6b93c011a7d228a301_Generic.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043702',\n",
       "  'url': 'https://www.littlebibliophile.com/wp-content/uploads/2017/11/Happy-Childrens-Day-2017-750x350.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043706',\n",
       "  'url': 'http://www.justiceworksltd.org/wp-content/uploads/2010/10/Just-run-round-No-date1-300x300.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043710',\n",
       "  'url': 'https://stephcalvertart.com/wp-content/uploads/2016/11/shop-no-cavities-club-dentist-12x12canvas-steph-calvert-art-mockup.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043734',\n",
       "  'url': 'http://cached.imagescaler.hbpl.co.uk/resize/scaleWidth/445/offlinehbpl.hbpl.co.uk/news/OWM/838FC6A7-A116-8248-AB33CF00756FFC6C.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043740',\n",
       "  'url': 'https://ih1.redbubble.net/image.120238612.6669/raf750x1000075theather_grey.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043791',\n",
       "  'url': 'https://theimaginationtree.com/wp-content/uploads/2013/01/Red+and+White+Clay+Hearts.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043800',\n",
       "  'url': 'https://altadenabaptist.hipcast.com/albumart/1003_1618837671.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043805',\n",
       "  'url': 'https://images-na.ssl-images-amazon.com/images/I/61gMzcNJQPL._SL300_.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043811',\n",
       "  'url': 'https://wampuscatstudentnews.com/wp-content/uploads/2019/10/Screen-Shot-2019-10-28-at-1.36.16-PM.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043936',\n",
       "  'url': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_320/images.spreaker.com/original/c383719c3af216baf7e072d61dbaed30.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000043980',\n",
       "  'url': 'https://nerdgeistdotcom.files.wordpress.com/2016/03/10-cloverfield-lane-jj-abrams.jpg?w=639&'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044052',\n",
       "  'url': 'http://t.qkme.me/3uigwj.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044153',\n",
       "  'url': 'http://ecx.images-amazon.com/images/I/51Z2RBHVCYL._SL500_AA300_.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044332',\n",
       "  'url': 'https://cdn.bleacherreport.net/images_root/slides/photos/000/414/385/95781002_original.jpg?1285605227'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044347',\n",
       "  'url': 'http://kelleykeller.com/wp-content/uploads/contracts-must-be-in-writing-800x300.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044370',\n",
       "  'url': 'https://cdn11.bigcommerce.com/s-omo2hp/images/stencil/1280x1280/products/2042/8954/bearbook6__94997.1479446314.jpg?c=2'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044374',\n",
       "  'url': 'http://i.ytimg.com/vi/zdOSOQWPtEQ/0.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044416',\n",
       "  'url': 'https://assets1.ignimgs.com/thumbs/2016/06/08/624fce02be8b446b0b58bb5a6d356b30-1465426762/frame_0000.jpg?fit=bounds&amp;dpr=1&amp;quality=75&amp;crop=16%3A9&amp;width=300&amp;format=pjpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044443',\n",
       "  'url': 'https://sr20.driftworks.com/media/catalog/product/cache/1/thumbnail/180x/9df78eab33525d08d6e5fb8d27136e95/i/m/image_JR181895XX2067HB_11287_1_4.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044448',\n",
       "  'url': 'https://i.pinimg.com/736x/f5/80/49/f5804962db4f0edf4828f029c032f570.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044480',\n",
       "  'url': 'https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F55815573%2F267423464591%2F1%2Foriginal.20190128-100832?w=512&auto=compress&rect=0%2C134%2C804%2C402&s=bf6d28ff17d65fcee3ddb0355d608cf3'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044623',\n",
       "  'url': 'https://xark.typepad.com/.a/6a00d8341c5d3453ef0167616ce05e970b-500wi'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044648',\n",
       "  'url': 'https://img1.od-cdn.com/ImageType-400/9161-1/D34/1EF/E2/%7BD341EFE2-F037-4CD7-886D-2B11C0C56B64%7DImg400.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044680',\n",
       "  'url': 'http://androidinfo.hu/wp-content/uploads/2017/05/android-apple2.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044809',\n",
       "  'url': 'https://image.spreadshirtmedia.com/image-server/v1/compositions/111242357/views/1width=300height=300appearanceId=1version=1460621206.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044879',\n",
       "  'url': 'http://labrador.se/wp-content/uploads/2013/07/shop_maryonettes_evilcoast.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044895',\n",
       "  'url': 'http://vafloc02.s3.amazonaws.com/isyn/images/f117/img-1708117-m.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000044906',\n",
       "  'url': 'https://images.squarespace-cdn.com/content/57ffc1d9414fb54338533658/1482170872136-OSVDHYNPO8KD5ZFII54Y/Oberg+Logo+-+From+Main+St.+Graphics.png?content-type=image%2Fpng'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045014',\n",
       "  'url': 'http://www.indigoinkprint.com/wp-content/uploads/2013/10/team-page.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045106',\n",
       "  'url': 'http://images.kitbag.com/mufc-130315.jpg?width=170&amp;height=170&amp;quality=95'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045124',\n",
       "  'url': 'http://rlv.zcache.co.uk/athlete_photo_insert_graduation_party_a_invitation-r886af006e6d94ea3b7cdf36cc3ff69e8_imtqg_8byvr_324.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045211',\n",
       "  'url': 'https://lacuillereenbois.fr/wp-content/uploads/2018/11/20181126_fruits_grolet-2-1140x758.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045215',\n",
       "  'url': 'https://images.tol-expo.it/cloud/logos/o/8VNL1F0U56HU_iwbank-private-investments.png?d=0&v=3970'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045323',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/0549/1389/2517/files/Pretty_Girl_Jewels_Logo_Metalic.png?v=1614445832'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045327',\n",
       "  'url': 'https://www.product-reviews.net/wp-content/uploads/clash-of-clans-update-today.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045362',\n",
       "  'url': 'http://www.emptykingdom.com/wp-content/uploads/2012/01/EKI_Heidi-Taillefer-600x395.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045379',\n",
       "  'url': 'http://i.ytimg.com/vi/-kjWJjtMwuQ/0.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045419',\n",
       "  'url': 'https://res.9appsinstall.com/group1/M00/8B/11/p4YBAFdA1wKAdU2VAAA1K2_Xpy8985.png'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045432',\n",
       "  'url': 'https://smlycdn.akamaized.net/products/270x270-fill/166e2d3c7b/3e535e9a4ddd692c4d56000baba61919e8a13d54.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045435',\n",
       "  'url': 'https://i1.wp.com/thebirthdaybest.com/wp-content/uploads/2020/11/25th-bday-quotes.jpg?resize=640%2C402&ssl=1'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045479',\n",
       "  'url': 'https://assets.burberry.com/is/image/Burberryltd/266c256165384d3b3596d9cfcdf088e1d6108c14.jpg?$BBY_V2_SL_4X3$&wid=760&hei=570'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045480',\n",
       "  'url': 'https://cdn.shopify.com/s/files/1/2770/5310/products/CART67995_87eb210c-ce3c-4509-8008-2afd03fa751e_200x200@2x.jpg?v=1571732568'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045482',\n",
       "  'url': 'https://blog.entheosweb.com/wp-content/uploads/2012/03/socialicons4.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045536',\n",
       "  'url': 'https://i.imgflip.com/3rtwts.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045741',\n",
       "  'url': 'http://a.mktgcdn.com/p/L3LHun9mgq8tnWEek4b2A-p0jExDnvcjt3rMkMI9mtE/500x500.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045774',\n",
       "  'url': 'http://i0.wp.com/hypebeast.com/image/2012/08/carhartt-x-vans-fall-2012-old-skool-camo-3.jpg?w=930'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045809',\n",
       "  'url': 'https://cdn.iset.io/assets/55084/produtos/175/brc074_thumb_atlhetica_best_whey_900g_original.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045923',\n",
       "  'url': 'https://diyprojects.com/wp-content/uploads/2014/01/where-to-get-pallets-600x384.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045953',\n",
       "  'url': 'https://cdn62.zvooq.com/pic?type=release&id=1033344&size=300x300&ext=jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045957',\n",
       "  'url': 'http://www.modorent.it/actionphp/thumb.output.php?src=Prodotti%2FBEA_18_PARTNER.jpg&wmax=300&hmax=300&quality=80&bgcol=FFFFFF&type=2&sid=cb10c60c8841161d6b3b1f00efc9723b'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045978',\n",
       "  'url': 'https://34hhymr52r9delys55ykaema-wpengine.netdna-ssl.com/wp-content/uploads/unnamed-24-300x300.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000045998',\n",
       "  'url': 'https://www.marcheleos.com/media/catalog/product/cache/1/small_image/295x295/9df78eab33525d08d6e5fb8d27136e95/3/5/355644.jpeg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000046076',\n",
       "  'url': 'https://artwork-cdn.7static.com/static/img/sleeveart/00/058/137/0005813722_350.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000046079',\n",
       "  'url': 'https://image.spreadshirtmedia.net/image-server/v1/compositions/141961824/views/1width=300height=300appearanceId=231backgroundColor=E8E8E8version=1472100673/offline-is-the-new-luxury-t-shirts-womens-premium-t-shirt.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000046081',\n",
       "  'url': 'https://knowth.com/images-kn/knowth-k78-700.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000046103',\n",
       "  'url': 'https://d14wch1fpzoq5q.cloudfront.net/2017/05/16155333/C_VHRc3XkAIe8Le-945x600.jpg'},\n",
       " {'ex_idx': '00004',\n",
       "  'in_idx': '000046229',\n",
       "  'url': 'https://proassets.monopile.cloud/43845/1ed51390512f6426baece897500f518a_m.jpg'},\n",
       " ...]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_lst.sort(key = lambda x: int(x[\"in_idx\"]))\n",
    "url_lst"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "url_txt_path = ospj(data_root, \"urls.txt\")\n",
    "os.system(f\"rm {url_txt_path}\")\n",
    "urls = []\n",
    "for item in tqdm(url_lst):\n",
    "    ex_idx = item[\"ex_idx\"]\n",
    "    in_idx = item[\"in_idx\"]\n",
    "    url = item[\"url\"]\n",
    "    urls.append(url+\"\\n\")\n",
    "\n",
    "with open(url_txt_path, \"w\") as fp:\n",
    "    fp.writelines(urls)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"img2dataset --url_list={url_txt_path} --output_folder={cache_root} --thread_count=64  --resize_mode=no\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pointer = 0\n",
    "total = 0\n",
    "ex_dirs = sorted(glob.glob(ospj(cache_root, \"?????\")))\n",
    "for ex_dir in tqdm(ex_dirs):\n",
    "    img_paths = sorted(glob.glob(ospj(ex_dir, \"*.jpg\")))\n",
    "    info_paths = sorted(glob.glob(ospj(ex_dir, \"*.json\")))\n",
    "    total += len(info_paths)\n",
    "    for img_path in img_paths:\n",
    "        name = img_path.split(os.sep)[-1].split(\".\")[0]\n",
    "        with open(ospj(ex_dir, f\"{name}.json\"), \"rb\") as fp:\n",
    "            info = json.load(fp)\n",
    "\n",
    "        while info[\"url\"] != url_lst[pointer][\"url\"]:\n",
    "            pointer += 1\n",
    "        if pointer >= len(url_lst):\n",
    "            print(\"pointer error\")\n",
    "        ex_idx = url_lst[pointer][\"ex_idx\"]\n",
    "        in_idx = url_lst[pointer][\"in_idx\"]\n",
    "        os.makedirs(ospj(image_root, ex_idx), exist_ok=True)\n",
    "        os.rename(img_path, ospj(image_root, ex_idx, f\"{in_idx}.jpg\"))\n",
    "\n",
    "print(f\"Total num: {total}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "img_dirs = sorted(glob.glob(ospj(image_root, \"?????\")))\n",
    "div = int(len(img_dirs) * 0.95)\n",
    "train_total = 0\n",
    "val_total = 0\n",
    "for i, img_dir in enumerate(tqdm(img_dirs)):\n",
    "    stype = \"train\" if i < div else \"val\" \n",
    "    img_paths = sorted(glob.glob(ospj(img_dir, \"*.jpg\")))\n",
    "    for img_path in img_paths:\n",
    "        if stype == \"train\":\n",
    "            train_total += 1\n",
    "        else:\n",
    "            val_total += 1\n",
    "        ex_idx = img_dir.split(os.sep)[-1]\n",
    "        in_idx = img_path.split(os.sep)[-1].split(\".\")[0]\n",
    "        anno_dir = ospj(anno_root, ex_idx, in_idx)\n",
    "        target_dir = ospj(data_root, stype, in_idx)\n",
    "        target_img_path = ospj(target_dir, 'image.jpg')\n",
    "        if os.path.exists(target_img_path):\n",
    "            continue\n",
    "        os.system(f\"cp -r {anno_dir} {target_dir}\")\n",
    "        os.system(f\"cp {img_path} {target_img_path}\")\n",
    "\n",
    "\n",
    "print(f\"div {train_total} train samples and {val_total} val samples\")\n",
    "        "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sdxl",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: sgm/__init__.py
================================================
from .models import AutoencodingEngine, DiffusionEngine
from .util import instantiate_from_config


================================================
FILE: sgm/lr_scheduler.py
================================================
import numpy as np


class LambdaWarmUpCosineScheduler:
    """
    note: use with a base_lr of 1.0
    """

    def __init__(
        self,
        warm_up_steps,
        lr_min,
        lr_max,
        lr_start,
        max_decay_steps,
        verbosity_interval=0,
    ):
        self.lr_warm_up_steps = warm_up_steps
        self.lr_start = lr_start
        self.lr_min = lr_min
        self.lr_max = lr_max
        self.lr_max_decay_steps = max_decay_steps
        self.last_lr = 0.0
        self.verbosity_interval = verbosity_interval

    def schedule(self, n, **kwargs):
        if self.verbosity_interval > 0:
            if n % self.verbosity_interval == 0:
                print(f"current step: {n}, recent lr-multiplier: {self.last_lr}")
        if n < self.lr_warm_up_steps:
            lr = (
                self.lr_max - self.lr_start
            ) / self.lr_warm_up_steps * n + self.lr_start
            self.last_lr = lr
            return lr
        else:
            t = (n - self.lr_warm_up_steps) / (
                self.lr_max_decay_steps - self.lr_warm_up_steps
            )
            t = min(t, 1.0)
            lr = self.lr_min + 0.5 * (self.lr_max - self.lr_min) * (
                1 + np.cos(t * np.pi)
            )
            self.last_lr = lr
            return lr

    def __call__(self, n, **kwargs):
        return self.schedule(n, **kwargs)


class LambdaWarmUpCosineScheduler2:
    """
    supports repeated iterations, configurable via lists
    note: use with a base_lr of 1.0.
    """

    def __init__(
        self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosity_interval=0
    ):
        assert (
            len(warm_up_steps)
            == len(f_min)
            == len(f_max)
            == len(f_start)
            == len(cycle_lengths)
        )
        self.lr_warm_up_steps = warm_up_steps
        self.f_start = f_start
        self.f_min = f_min
        self.f_max = f_max
        self.cycle_lengths = cycle_lengths
        self.cum_cycles = np.cumsum([0] + list(self.cycle_lengths))
        self.last_f = 0.0
        self.verbosity_interval = verbosity_interval

    def find_in_interval(self, n):
        interval = 0
        for cl in self.cum_cycles[1:]:
            if n <= cl:
                return interval
            interval += 1

    def schedule(self, n, **kwargs):
        cycle = self.find_in_interval(n)
        n = n - self.cum_cycles[cycle]
        if self.verbosity_interval > 0:
            if n % self.verbosity_interval == 0:
                print(
                    f"current step: {n}, recent lr-multiplier: {self.last_f}, "
                    f"current cycle {cycle}"
                )
        if n < self.lr_warm_up_steps[cycle]:
            f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[
                cycle
            ] * n + self.f_start[cycle]
            self.last_f = f
            return f
        else:
            t = (n - self.lr_warm_up_steps[cycle]) / (
                self.cycle_lengths[cycle] - self.lr_warm_up_steps[cycle]
            )
            t = min(t, 1.0)
            f = self.f_min[cycle] + 0.5 * (self.f_max[cycle] - self.f_min[cycle]) * (
                1 + np.cos(t * np.pi)
            )
            self.last_f = f
            return f

    def __call__(self, n, **kwargs):
        return self.schedule(n, **kwargs)


class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2):
    def schedule(self, n, **kwargs):
        cycle = self.find_in_interval(n)
        n = n - self.cum_cycles[cycle]
        if self.verbosity_interval > 0:
            if n % self.verbosity_interval == 0:
                print(
                    f"current step: {n}, recent lr-multiplier: {self.last_f}, "
                    f"current cycle {cycle}"
                )

        if n < self.lr_warm_up_steps[cycle]:
            f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[
                cycle
            ] * n + self.f_start[cycle]
            self.last_f = f
            return f
        else:
            f = self.f_min[cycle] + (self.f_max[cycle] - self.f_min[cycle]) * (
                self.cycle_lengths[cycle] - n
            ) / (self.cycle_lengths[cycle])
            self.last_f = f
            return f


================================================
FILE: sgm/models/__init__.py
================================================
from .autoencoder import AutoencodingEngine
from .diffusion import DiffusionEngine


================================================
FILE: sgm/models/autoencoder.py
================================================
import re
from abc import abstractmethod
from contextlib import contextmanager
from typing import Any, Dict, Tuple, Union

import pytorch_lightning as pl
import torch
from omegaconf import ListConfig
from packaging import version
from safetensors.torch import load_file as load_safetensors

from ..modules.diffusionmodules.model import Decoder, Encoder
from ..modules.distributions.distributions import DiagonalGaussianDistribution
from ..modules.ema import LitEma
from ..util import default, get_obj_from_str, instantiate_from_config


class AbstractAutoencoder(pl.LightningModule):
    """
    This is the base class for all autoencoders, including image autoencoders, image autoencoders with discriminators,
    unCLIP models, etc. Hence, it is fairly general, and specific features
    (e.g. discriminator training, encoding, decoding) must be implemented in subclasses.
    """

    def __init__(
        self,
        ema_decay: Union[None, float] = None,
        monitor: Union[None, str] = None,
        input_key: str = "jpg",
        ckpt_path: Union[None, str] = None,
        ignore_keys: Union[Tuple, list, ListConfig] = (),
    ):
        super().__init__()
        self.input_key = input_key
        self.use_ema = ema_decay is not None
        if monitor is not None:
            self.monitor = monitor

        if self.use_ema:
            self.model_ema = LitEma(self, decay=ema_decay)
            print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")

        if ckpt_path is not None:
            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)

        if version.parse(torch.__version__) >= version.parse("2.0.0"):
            self.automatic_optimization = False

    def init_from_ckpt(
        self, path: str, ignore_keys: Union[Tuple, list, ListConfig] = tuple()
    ) -> None:
        if path.endswith("ckpt"):
            sd = torch.load(path, map_location="cpu")["state_dict"]
        elif path.endswith("safetensors"):
            sd = load_safetensors(path)
        else:
            raise NotImplementedError

        keys = list(sd.keys())
        for k in keys:
            for ik in ignore_keys:
                if re.match(ik, k):
                    print("Deleting key {} from state_dict.".format(k))
                    del sd[k]
        missing, unexpected = self.load_state_dict(sd, strict=False)
        print(
            f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys"
        )
        if len(missing) > 0:
            print(f"Missing Keys: {missing}")
        if len(unexpected) > 0:
            print(f"Unexpected Keys: {unexpected}")

    @abstractmethod
    def get_input(self, batch) -> Any:
        raise NotImplementedError()

    def on_train_batch_end(self, *args, **kwargs):
        # for EMA computation
        if self.use_ema:
            self.model_ema(self)

    @contextmanager
    def ema_scope(self, context=None):
        if self.use_ema:
            self.model_ema.store(self.parameters())
            self.model_ema.copy_to(self)
            if context is not None:
                print(f"{context}: Switched to EMA weights")
        try:
            yield None
        finally:
            if self.use_ema:
                self.model_ema.restore(self.parameters())
                if context is not None:
                    print(f"{context}: Restored training weights")

    @abstractmethod
    def encode(self, *args, **kwargs) -> torch.Tensor:
        raise NotImplementedError("encode()-method of abstract base class called")

    @abstractmethod
    def decode(self, *args, **kwargs) -> torch.Tensor:
        raise NotImplementedError("decode()-method of abstract base class called")

    def instantiate_optimizer_from_config(self, params, lr, cfg):
        print(f"loading >>> {cfg['target']} <<< optimizer from config")
        return get_obj_from_str(cfg["target"])(
            params, lr=lr, **cfg.get("params", dict())
        )

    def configure_optimizers(self) -> Any:
        raise NotImplementedError()


class AutoencodingEngine(AbstractAutoencoder):
    """
    Base class for all image autoencoders that we train, like VQGAN or AutoencoderKL
    (we also restore them explicitly as special cases for legacy reasons).
    Regularizations such as KL or VQ are moved to the regularizer class.
    """

    def __init__(
        self,
        *args,
        encoder_config: Dict,
        decoder_config: Dict,
        loss_config: Dict,
        regularizer_config: Dict,
        optimizer_config: Union[Dict, None] = None,
        lr_g_factor: float = 1.0,
        **kwargs,
    ):
        super().__init__(*args, **kwargs)
        # todo: add options to freeze encoder/decoder
        self.encoder = instantiate_from_config(encoder_config)
        self.decoder = instantiate_from_config(decoder_config)
        self.loss = instantiate_from_config(loss_config)
        self.regularization = instantiate_from_config(regularizer_config)
        self.optimizer_config = default(
            optimizer_config, {"target": "torch.optim.Adam"}
        )
        self.lr_g_factor = lr_g_factor

    def get_input(self, batch: Dict) -> torch.Tensor:
        # assuming unified data format, dataloader returns a dict.
        # image tensors should be scaled to -1 ... 1 and in channels-first format (e.g., bchw instead if bhwc)
        return batch[self.input_key]

    def get_autoencoder_params(self) -> list:
        params = (
            list(self.encoder.parameters())
            + list(self.decoder.parameters())
            + list(self.regularization.get_trainable_parameters())
            + list(self.loss.get_trainable_autoencoder_parameters())
        )
        return params

    def get_discriminator_params(self) -> list:
        params = list(self.loss.get_trainable_parameters())  # e.g., discriminator
        return params

    def get_last_layer(self):
        return self.decoder.get_last_layer()

    def encode(self, x: Any, return_reg_log: bool = False) -> Any:
        z = self.encoder(x)
        z, reg_log = self.regularization(z)
        if return_reg_log:
            return z, reg_log
        return z

    def decode(self, z: Any) -> torch.Tensor:
        x = self.decoder(z)
        return x

    def forward(self, x: Any) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        z, reg_log = self.encode(x, return_reg_log=True)
        dec = self.decode(z)
        return z, dec, reg_log

    def training_step(self, batch, batch_idx, optimizer_idx) -> Any:
        x = self.get_input(batch)
        z, xrec, regularization_log = self(x)

        if optimizer_idx == 0:
            # autoencode
            aeloss, log_dict_ae = self.loss(
                regularization_log,
                x,
                xrec,
                optimizer_idx,
                self.global_step,
                last_layer=self.get_last_layer(),
                split="train",
            )

            self.log_dict(
                log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True
            )
            return aeloss

        if optimizer_idx == 1:
            # discriminator
            discloss, log_dict_disc = self.loss(
                regularization_log,
                x,
                xrec,
                optimizer_idx,
                self.global_step,
                last_layer=self.get_last_layer(),
                split="train",
            )
            self.log_dict(
                log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True
            )
            return discloss

    def validation_step(self, batch, batch_idx) -> Dict:
        log_dict = self._validation_step(batch, batch_idx)
        with self.ema_scope():
            log_dict_ema = self._validation_step(batch, batch_idx, postfix="_ema")
            log_dict.update(log_dict_ema)
        return log_dict

    def _validation_step(self, batch, batch_idx, postfix="") -> Dict:
        x = self.get_input(batch)

        z, xrec, regularization_log = self(x)
        aeloss, log_dict_ae = self.loss(
            regularization_log,
            x,
            xrec,
            0,
            self.global_step,
            last_layer=self.get_last_layer(),
            split="val" + postfix,
        )

        discloss, log_dict_disc = self.loss(
            regularization_log,
            x,
            xrec,
            1,
            self.global_step,
            last_layer=self.get_last_layer(),
            split="val" + postfix,
        )
        self.log(f"val{postfix}/rec_loss", log_dict_ae[f"val{postfix}/rec_loss"])
        log_dict_ae.update(log_dict_disc)
        self.log_dict(log_dict_ae)
        return log_dict_ae

    def configure_optimizers(self) -> Any:
        ae_params = self.get_autoencoder_params()
        disc_params = self.get_discriminator_params()

        opt_ae = self.instantiate_optimizer_from_config(
            ae_params,
            default(self.lr_g_factor, 1.0) * self.learning_rate,
            self.optimizer_config,
        )
        opt_disc = self.instantiate_optimizer_from_config(
            disc_params, self.learning_rate, self.optimizer_config
        )

        return [opt_ae, opt_disc], []

    @torch.no_grad()
    def log_images(self, batch: Dict, **kwargs) -> Dict:
        log = dict()
        x = self.get_input(batch)
        _, xrec, _ = self(x)
        log["inputs"] = x
        log["reconstructions"] = xrec
        with self.ema_scope():
            _, xrec_ema, _ = self(x)
            log["reconstructions_ema"] = xrec_ema
        return log


class AutoencoderKL(AutoencodingEngine):
    def __init__(self, embed_dim: int, **kwargs):
        ddconfig = kwargs.pop("ddconfig")
        ckpt_path = kwargs.pop("ckpt_path", None)
        ignore_keys = kwargs.pop("ignore_keys", ())
        super().__init__(
            encoder_config={"target": "torch.nn.Identity"},
            decoder_config={"target": "torch.nn.Identity"},
            regularizer_config={"target": "torch.nn.Identity"},
            loss_config=kwargs.pop("lossconfig"),
            **kwargs,
        )
        assert ddconfig["double_z"]
        self.encoder = Encoder(**ddconfig)
        self.decoder = Decoder(**ddconfig)
        self.quant_conv = torch.nn.Conv2d(2 * ddconfig["z_channels"], 2 * embed_dim, 1)
        self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
        self.embed_dim = embed_dim

        if ckpt_path is not None:
            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)

    def encode(self, x):
        assert (
            not self.training
        ), f"{self.__class__.__name__} only supports inference currently"
        h = self.encoder(x)
        moments = self.quant_conv(h) 
        posterior = DiagonalGaussianDistribution(moments)
        return posterior

    def decode(self, z, **decoder_kwargs):
        z = self.post_quant_conv(z)
        dec = self.decoder(z, **decoder_kwargs)
        return dec


class AutoencoderKLInferenceWrapper(AutoencoderKL):
    def encode(self, x):
        return super().encode(x).sample()


class IdentityFirstStage(AbstractAutoencoder):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def get_input(self, x: Any) -> Any:
        return x

    def encode(self, x: Any, *args, **kwargs) -> Any:
        return x

    def decode(self, x: Any, *args, **kwargs) -> Any:
        return x


================================================
FILE: sgm/models/diffusion.py
================================================
from contextlib import contextmanager
from typing import Any, Dict, List, Tuple, Union

import pytorch_lightning as pl
import torch
from omegaconf import ListConfig, OmegaConf
from safetensors.torch import load_file as load_safetensors
from torch.optim.lr_scheduler import LambdaLR

from ..modules import UNCONDITIONAL_CONFIG
from ..modules.diffusionmodules.wrappers import OPENAIUNETWRAPPER
from ..modules.ema import LitEma
from ..util import (
    default,
    disabled_train,
    get_obj_from_str,
    instantiate_from_config,
    log_txt_as_img,
)


class DiffusionEngine(pl.LightningModule):
    def __init__(
        self,
        network_config,
        denoiser_config,
        first_stage_config,
        conditioner_config: Union[None, Dict, ListConfig, OmegaConf] = None,
        sampler_config: Union[None, Dict, ListConfig, OmegaConf] = None,
        optimizer_config: Union[None, Dict, ListConfig, OmegaConf] = None,
        scheduler_config: Union[None, Dict, ListConfig, OmegaConf] = None,
        loss_fn_config: Union[None, Dict, ListConfig, OmegaConf] = None,
        network_wrapper: Union[None, str] = None,
        ckpt_path: Union[None, str] = None,
        use_ema: bool = False,
        ema_decay_rate: float = 0.9999,
        scale_factor: float = 1.0,
        disable_first_stage_autocast=False,
        input_key: str = "jpg",
        log_keys: Union[List, None] = None,
        no_cond_log: bool = False,
        compile_model: bool = False,
        opt_keys: Union[List, None] = None
    ):
        super().__init__()
        self.opt_keys = opt_keys
        self.log_keys = log_keys
        self.input_key = input_key
        self.optimizer_config = default(
            optimizer_config, {"target": "torch.optim.AdamW"}
        )
        model = instantiate_from_config(network_config)
        self.model = get_obj_from_str(default(network_wrapper, OPENAIUNETWRAPPER))(
            model, compile_model=compile_model
        )

        self.denoiser = instantiate_from_config(denoiser_config)
        self.sampler = (
            instantiate_from_config(sampler_config)
            if sampler_config is not None
            else None
        )
        self.conditioner = instantiate_from_config(
            default(conditioner_config, UNCONDITIONAL_CONFIG)
        )
        self.scheduler_config = scheduler_config
        self._init_first_stage(first_stage_config)

        self.loss_fn = (
            instantiate_from_config(loss_fn_config)
            if loss_fn_config is not None
            else None
        )

        self.use_ema = use_ema
        if self.use_ema:
            self.model_ema = LitEma(self.model, decay=ema_decay_rate)
            print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")

        self.scale_factor = scale_factor
        self.disable_first_stage_autocast = disable_first_stage_autocast
        self.no_cond_log = no_cond_log

        if ckpt_path is not None:
            self.init_from_ckpt(ckpt_path)

    def init_from_ckpt(
        self,
        path: str,
    ) -> None:
        if path.endswith("ckpt"):
            sd = torch.load(path, map_location="cpu")["state_dict"]
        elif path.endswith("safetensors"):
            sd = load_safetensors(path)
        else:
            raise NotImplementedError

        missing, unexpected = self.load_state_dict(sd, strict=False)
        print(
            f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys"
        )
        if len(missing) > 0:
            print(f"Missing Keys: {missing}")
        if len(unexpected) > 0:
            print(f"Unexpected Keys: {unexpected}")

    def freeze(self):

        for param in self.parameters():
            param.requires_grad_(False)
            
    def _init_first_stage(self, config):
        model = instantiate_from_config(config).eval()
        model.train = disabled_train
        for param in model.parameters():
            param.requires_grad = False
        self.first_stage_model = model

    def get_input(self, batch):
        # assuming unified data format, dataloader returns a dict.
        # image tensors should be scaled to -1 ... 1 and in bchw format
        return batch[self.input_key]

    @torch.no_grad()
    def decode_first_stage(self, z):
        z = 1.0 / self.scale_factor * z
        with torch.autocast("cuda", enabled=not self.disable_first_stage_autocast):
            out = self.first_stage_model.decode(z)
        return out

    @torch.no_grad()
    def encode_first_stage(self, x):
        with torch.autocast("cuda", enabled=not self.disable_first_stage_autocast):
            z = self.first_stage_model.encode(x)
        z = self.scale_factor * z
        return z

    def forward(self, x, batch):

        loss, loss_dict = self.loss_fn(self.model, self.denoiser, self.conditioner, x, batch, self.first_stage_model, self.scale_factor)

        return loss, loss_dict

    def shared_step(self, batch: Dict) -> Any:
        x = self.get_input(batch)
        x = self.encode_first_stage(x)
        batch["global_step"] = self.global_step
        loss, loss_dict = self(x, batch)
        return loss, loss_dict

    def training_step(self, batch, batch_idx):
        loss, loss_dict = self.shared_step(batch)

        self.log_dict(
            loss_dict, prog_bar=True, logger=True, on_step=True, on_epoch=False
        )

        self.log(
            "global_step",
            float(self.global_step),
            prog_bar=True,
            logger=True,
            on_step=True,
            on_epoch=False,
        )

        lr = self.optimizers().param_groups[0]["lr"]
        self.log(
            "lr_abs", lr, prog_bar=True, logger=True, on_step=True, on_epoch=False
        )

        return loss

    def on_train_start(self, *args, **kwargs):
        if self.sampler is None or self.loss_fn is None:
            raise ValueError("Sampler and loss function need to be set for training.")

    def on_train_batch_end(self, *args, **kwargs):
        if self.use_ema:
            self.model_ema(self.model)

    @contextmanager
    def ema_scope(self, context=None):
        if self.use_ema:
            self.model_ema.store(self.model.parameters())
            self.model_ema.copy_to(self.model)
            if context is not None:
                print(f"{context}: Switched to EMA weights")
        try:
            yield None
        finally:
            if self.use_ema:
                self.model_ema.restore(self.model.parameters())
                if context is not None:
                    print(f"{context}: Restored training weights")

    def instantiate_optimizer_from_config(self, params, lr, cfg):
        return get_obj_from_str(cfg["target"])(
            params, lr=lr, **cfg.get("params", dict())
        )

    def configure_optimizers(self):
        lr = self.learning_rate
        params = []
        print("Trainable parameter list: ")
        print("-"*20)
        for name, param in self.model.named_parameters():
            if any([key in name for key in self.opt_keys]):
                params.append(param)
                print(name)
            else:
                param.requires_grad_(False)
        for embedder in self.conditioner.embedders:
            if embedder.is_trainable:
                for name, param in embedder.named_parameters():
                    params.append(param)
                    print(name)
        print("-"*20)
        opt = self.instantiate_optimizer_from_config(params, lr, self.optimizer_config)
        scheduler = torch.optim.lr_scheduler.LambdaLR(opt, lr_lambda=lambda epoch: 0.95**epoch)

        return [opt], scheduler

    @torch.no_grad()
    def sample(
        self,
        cond: Dict,
        uc: Union[Dict, None] = None,
        batch_size: int = 16,
        shape: Union[None, Tuple, List] = None,
        **kwargs,
    ):
        randn = torch.randn(batch_size, *shape).to(self.device)

        denoiser = lambda input, sigma, c: self.denoiser(
            self.model, input, sigma, c, **kwargs
        )
        samples = self.sampler(denoiser, randn, cond, uc=uc)
        return samples

    @torch.no_grad()
    def log_conditionings(self, batch: Dict, n: int) -> Dict:
        """
        Defines heuristics to log different conditionings.
        These can be lists of strings (text-to-image), tensors, ints, ...
        """
        image_h, image_w = batch[self.input_key].shape[2:]
        log = dict()

        for embedder in self.conditioner.embedders:
            if (
                (self.log_keys is None) or (embedder.input_key in self.log_keys)
            ) and not self.no_cond_log:
                x = batch[embedder.input_key][:n]
                if isinstance(x, torch.Tensor):
                    if x.dim() == 1:
                        # class-conditional, convert integer to string
                        x = [str(x[i].item()) for i in range(x.shape[0])]
                        xc = log_txt_as_img((image_h, image_w), x, size=image_h // 4)
                    elif x.dim() == 2:
                        # size and crop cond and the like
                        x = [
                            "x".join([str(xx) for xx in x[i].tolist()])
                            for i in range(x.shape[0])
                        ]
                        xc = log_txt_as_img((image_h, image_w), x, size=image_h // 20)
                    else:
                        raise NotImplementedError()
                elif isinstance(x, (List, ListConfig)):
                    if isinstance(x[0], str):
                        # strings
                        xc = log_txt_as_img((image_h, image_w), x, size=image_h // 20)
                    else:
                        raise NotImplementedError()
                else:
                    raise NotImplementedError()
                log[embedder.input_key] = xc
        return log

    @torch.no_grad()
    def log_images(
        self,
        batch: Dict,
        N: int = 8,
        sample: bool = True,
        ucg_keys: List[str] = None,
        **kwargs,
    ) -> Dict:
        conditioner_input_keys = [e.input_key for e in self.conditioner.embedders]
        if ucg_keys:
            assert all(map(lambda x: x in conditioner_input_keys, ucg_keys)), (
                "Each defined ucg key for sampling must be in the provided conditioner input keys,"
                f"but we have {ucg_keys} vs. {conditioner_input_keys}"
            )
        else:
            ucg_keys = conditioner_input_keys
        log = dict()

        x = self.get_input(batch)

        c, uc = self.conditioner.get_unconditional_conditioning(
            batch,
            force_uc_zero_embeddings=ucg_keys
            if len(self.conditioner.embedders) > 0
            else [],
        )

        sampling_kwargs = {}

        N = min(x.shape[0], N)
        x = x.to(self.device)[:N]
        log["inputs"] = x
        z = self.encode_first_stage(x)
        log["reconstructions"] = self.decode_first_stage(z)
        log.update(self.log_conditionings(batch, N))

        for k in c:
            if isinstance(c[k], torch.Tensor):
                c[k], uc[k] = map(lambda y: y[k][:N].to(self.device), (c, uc))

        if sample:
            with self.ema_scope("Plotting"):
                samples = self.sample(
                    c, shape=z.shape[1:], uc=uc, batch_size=N, **sampling_kwargs
                )
            samples = self.decode_first_stage(samples)
            log["samples"] = samples
        return log


================================================
FILE: sgm/modules/__init__.py
================================================
from .encoders.modules import GeneralConditioner

UNCONDITIONAL_CONFIG = {
    "target": "sgm.modules.GeneralConditioner",
    "params": {"emb_models": []},
}


================================================
FILE: sgm/modules/attention.py
================================================
import math
from inspect import isfunction
from typing import Any, Optional

import torch
import torch.nn.functional as F
from einops import rearrange, repeat
from torch import nn, einsum

try:
    import xformers
    import xformers.ops
    XFORMERS_IS_AVAILABLE = True
except:
    XFORMERS_IS_AVAILABLE = False
    print("No module 'xformers'.")


def exists(val):
    return val is not None


def uniq(arr):
    return {el: True for el in arr}.keys()


def default(val, d):
    if exists(val):
        return val
    return d() if isfunction(d) else d


def max_neg_value(t):
    return -torch.finfo(t.dtype).max


def init_(tensor):
    dim = tensor.shape[-1]
    std = 1 / math.sqrt(dim)
    tensor.uniform_(-std, std)
    return tensor

# feedforward
class GEGLU(nn.Module):
    def __init__(self, dim_in, dim_out):
        super().__init__()
        self.proj = nn.Linear(dim_in, dim_out * 2)

    def forward(self, x):
        x, gate = self.proj(x).chunk(2, dim=-1)
        return x * F.gelu(gate)


class FeedForward(nn.Module):
    def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0):
        super().__init__()
        inner_dim = int(dim * mult)
        dim_out = default(dim_out, dim)
        project_in = (
            nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU())
            if not glu
            else GEGLU(dim, inner_dim)
        )

        self.net = nn.Sequential(
            project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)
        )

    def forward(self, x):
        return self.net(x)


def zero_module(module):
    """
    Zero out the parameters of a module and return it.
    """
    for p in module.parameters():
        p.detach().zero_()
    return module


def Normalize(in_channels):
    return torch.nn.GroupNorm(
        num_groups=32, num_channels=in_channels, eps=1e-6, affine=True
    )


class LinearAttention(nn.Module):
    def __init__(self, dim, heads=4, dim_head=32):
        super().__init__()
        self.heads = heads
        hidden_dim = dim_head * heads
        self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False)
        self.to_out = nn.Conv2d(hidden_dim, dim, 1)

    def forward(self, x):
        b, c, h, w = x.shape
        qkv = self.to_qkv(x)
        q, k, v = rearrange(
            qkv, "b (qkv heads c) h w -> qkv b heads c (h w)", heads=self.heads, qkv=3
        )
        k = k.softmax(dim=-1)
        context = torch.einsum("bhdn,bhen->bhde", k, v)
        out = torch.einsum("bhde,bhdn->bhen", context, q)
        out = rearrange(
            out, "b heads c (h w) -> b (heads c) h w", heads=self.heads, h=h, w=w
        )
        return self.to_out(out)


class CrossAttention(nn.Module):
    def __init__(
        self,
        query_dim,
        context_dim=None,
        heads=8,
        dim_head=64,
        dropout=0.0
    ):
        super().__init__()
        inner_dim = dim_head * heads
        context_dim = default(context_dim, query_dim)

        self.scale = dim_head**-0.5
        self.heads = heads

        self.to_q = nn.Linear(query_dim, inner_dim, bias=False)
        self.to_k = nn.Linear(context_dim, inner_dim, bias=False)
        self.to_v = nn.Linear(context_dim, inner_dim, bias=False)

        self.to_out = zero_module(
            nn.Sequential(
                nn.Linear(inner_dim, query_dim),
                nn.Dropout(dropout)
            )
        )

        self.attn_map_cache = None

    def forward(
        self,
        x,
        context=None
    ):
        h = self.heads

        q = self.to_q(x)
        context = default(context, x)
        k = self.to_k(context)
        v = self.to_v(context)

        q, k, v = map(lambda t: rearrange(t, "b n (h d) -> (b h) n d", h=h), (q, k, v))

        ## old
        sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
        del q, k

        # attention, what we cannot get enough of
        if sim.shape[-1] > 1:
            sim = sim.softmax(dim=-1) # softmax on token dim
        else:
            sim = sim.sigmoid() # sigmoid on pixel dim

        # save attn_map
        if self.attn_map_cache is not None:
            bh, n, l = sim.shape
            size = int(n**0.5)
            self.attn_map_cache["size"] = size
            self.attn_map_cache["attn_map"] = sim

        out = einsum('b i j, b j d -> b i d', sim, v)
        out = rearrange(out, "(b h) n d -> b n (h d)", h=h)
        
        return self.to_out(out)


class MemoryEfficientCrossAttention(nn.Module):
    # https://github.com/MatthieuTPHR/diffusers/blob/d80b531ff8060ec1ea982b65a1b8df70f73aa67c/src/diffusers/models/attention.py#L223
    def __init__(
        self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0, **kwargs
    ):
        super().__init__()
        # print(
        #     f"Setting up {self.__class__.__name__}. Query dim is {query_dim}, context_dim is {context_dim} and using "
        #     f"{heads} heads with a dimension of {dim_head}."
        # )
        inner_dim = dim_head * heads
        context_dim = default(context_dim, query_dim)

        self.heads = heads
        self.dim_head = dim_head

        self.to_q = nn.Linear(query_dim, inner_dim, bias=False)
        self.to_k = nn.Linear(context_dim, inner_dim, bias=False)
        self.to_v = nn.Linear(context_dim, inner_dim, bias=False)

        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)
        )
        self.attention_op: Optional[Any] = None

    def forward(
        self,
        x,
        context=None,
        mask=None,
        additional_tokens=None,
        n_times_crossframe_attn_in_self=0,
    ):
        if additional_tokens is not None:
            # get the number of masked tokens at the beginning of the output sequence
            n_tokens_to_mask = additional_tokens.shape[1]
            # add additional token
            x = torch.cat([additional_tokens, x], dim=1)
        q = self.to_q(x)
        context = default(context, x)
        k = self.to_k(context)
        v = self.to_v(context)

        if n_times_crossframe_attn_in_self:
            # reprogramming cross-frame attention as in https://arxiv.org/abs/2303.13439
            assert x.shape[0] % n_times_crossframe_attn_in_self == 0
            # n_cp = x.shape[0]//n_times_crossframe_attn_in_self
            k = repeat(
                k[::n_times_crossframe_attn_in_self],
                "b ... -> (b n) ...",
                n=n_times_crossframe_attn_in_self,
            )
            v = repeat(
                v[::n_times_crossframe_attn_in_self],
                "b ... -> (b n) ...",
                n=n_times_crossframe_attn_in_self,
            )

        b, _, _ = q.shape
        q, k, v = map(
            lambda t: t.unsqueeze(3)
            .reshape(b, t.shape[1], self.heads, self.dim_head)
            .permute(0, 2, 1, 3)
            .reshape(b * self.heads, t.shape[1], self.dim_head)
            .contiguous(),
            (q, k, v),
        )

        # actually compute the attention, what we cannot get enough of
        out = xformers.ops.memory_efficient_attention(
            q, k, v, attn_bias=None, op=self.attention_op
        )

        # TODO: Use this directly in the attention operation, as a bias
        if exists(mask):
            raise NotImplementedError
        out = (
            out.unsqueeze(0)
            .reshape(b, self.heads, out.shape[1], self.dim_head)
            .permute(0, 2, 1, 3)
            .reshape(b, out.shape[1], self.heads * self.dim_head)
        )
        if additional_tokens is not None:
            # remove additional token
            out = out[:, n_tokens_to_mask:]
        return self.to_out(out)


class BasicTransformerBlock(nn.Module):

    def __init__(
        self,
        dim,
        n_heads,
        d_head,
        dropout=0.0,
        t_context_dim=None,
        v_context_dim=None,
        gated_ff=True
    ):
        super().__init__()

        # self-attention
        self.attn1 = MemoryEfficientCrossAttention(
            query_dim=dim,
            heads=n_heads,
            dim_head=d_head,
            dropout=dropout,
            context_dim=None
        )

        # textual cross-attention
        if t_context_dim is not None and t_context_dim > 0:
            self.t_attn = CrossAttention(
                query_dim=dim,
                context_dim=t_context_dim,
                heads=n_heads,
                dim_head=d_head,
                dropout=dropout
            )
            self.t_norm = nn.LayerNorm(dim)
        
        # visual cross-attention
        if v_context_dim is not None and v_context_dim > 0:
            self.v_attn = CrossAttention(
                query_dim=dim,
                context_dim=v_context_dim,
                heads=n_heads,
                dim_head=d_head,
                dropout=dropout
            )
            self.v_norm = nn.LayerNorm(dim)

        self.norm1 = nn.LayerNorm(dim)
        self.norm3 = nn.LayerNorm(dim)
        self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff)

    def forward(self, x, t_context=None, v_context=None):
        x = (
            self.attn1(
                self.norm1(x),
                context=None
            )
            + x
        )
        if hasattr(self, "t_attn"):
            x = (
                self.t_attn(
                    self.t_norm(x),
                    context=t_context
                )
                + x
            )
        if hasattr(self, "v_attn"):
            x = (
                self.v_attn(
                    self.v_norm(x),
                    context=v_context
                )
                + x
            )

        x = self.ff(self.norm3(x)) + x

        return x


class SpatialTransformer(nn.Module):
    """
    Transformer block for image-like data.
    First, project the input (aka embedding)
    and reshape to b, t, d.
    Then apply standard transformer action.
    Finally, reshape to image
    NEW: use_linear for more efficiency instead of the 1x1 convs
    """

    def __init__(
        self,
        in_channels,
        n_heads,
        d_head,
        depth=1,
        dropout=0.0,
        t_context_dim=None,
        v_context_dim=None,
        use_linear=False
    ):
        super().__init__()
 
        self.in_channels = in_channels
        inner_dim = n_heads * d_head
        self.norm = Normalize(in_channels)
        if not use_linear:
            self.proj_in = nn.Conv2d(
                in_channels, inner_dim, kernel_size=1, stride=1, padding=0
            )
        else:
            self.proj_in = nn.Linear(in_channels, inner_dim)

        self.transformer_blocks = nn.ModuleList(
            [
                BasicTransformerBlock(
                    inner_dim,
                    n_heads,
                    d_head,
                    dropout=dropout,
                    t_context_dim=t_context_dim,
                    v_context_dim=v_context_dim
                )
                for d in range(depth)
            ]
        )
        if not use_linear:
            self.proj_out = zero_module(
                nn.Conv2d(inner_dim, in_channels, kernel_size=1, stride=1, padding=0)
            )
        else:
            self.proj_out = zero_module(nn.Linear(inner_dim, in_channels))
        self.use_linear = use_linear

    def forward(self, x, t_context=None, v_context=None):

        b, c, h, w = x.shape
        x_in = x
        x = self.norm(x)
        if not self.use_linear:
            x = self.proj_in(x)
        x = rearrange(x, "b c h w -> b (h w) c").contiguous()
        if self.use_linear:
            x = self.proj_in(x)
        for i, block in enumerate(self.transformer_blocks):
            x = block(x, t_context=t_context, v_context=v_context)
        if self.use_linear:
            x = self.proj_out(x)
        x = rearrange(x, "b (h w) c -> b c h w", h=h, w=w).contiguous()
        if not self.use_linear:
            x = self.proj_out(x)

        return x + x_in

================================================
FILE: sgm/modules/autoencoding/__init__.py
================================================


================================================
FILE: sgm/modules/autoencoding/losses/__init__.py
================================================
from typing import Any, Union

import torch
import torch.nn as nn
from einops import rearrange
from taming.modules.discriminator.model import NLayerDiscriminator, weights_init
from taming.modules.losses.lpips import LPIPS
from taming.modules.losses.vqperceptual import hinge_d_loss, vanilla_d_loss

from ....util import default, instantiate_from_config


def adopt_weight(weight, global_step, threshold=0, value=0.0):
    if global_step < threshold:
        weight = value
    return weight


class LatentLPIPS(nn.Module):
    def __init__(
        self,
        decoder_config,
        perceptual_weight=1.0,
        latent_weight=1.0,
        scale_input_to_tgt_size=False,
        scale_tgt_to_input_size=False,
        perceptual_weight_on_inputs=0.0,
    ):
        super().__init__()
        self.scale_input_to_tgt_size = scale_input_to_tgt_size
        self.scale_tgt_to_input_size = scale_tgt_to_input_size
        self.init_decoder(decoder_config)
        self.perceptual_loss = LPIPS().eval()
        self.perceptual_weight = perceptual_weight
        self.latent_weight = latent_weight
        self.perceptual_weight_on_inputs = perceptual_weight_on_inputs

    def init_decoder(self, config):
        self.decoder = instantiate_from_config(config)
        if hasattr(self.decoder, "encoder"):
            del self.decoder.encoder

    def forward(self, latent_inputs, latent_predictions, image_inputs, split="train"):
        log = dict()
        loss = (latent_inputs - latent_predictions) ** 2
        log[f"{split}/latent_l2_loss"] = loss.mean().detach()
        image_reconstructions = None
        if self.perceptual_weight > 0.0:
            image_reconstructions = self.decoder.decode(latent_predictions)
            image_targets = self.decoder.decode(latent_inputs)
            perceptual_loss = self.perceptual_loss(
                image_targets.contiguous(), image_reconstructions.contiguous()
            )
            loss = (
                self.latent_weight * loss.mean()
                + self.perceptual_weight * perceptual_loss.mean()
            )
            log[f"{split}/perceptual_loss"] = perceptual_loss.mean().detach()

        if self.perceptual_weight_on_inputs > 0.0:
            image_reconstructions = default(
                image_reconstructions, self.decoder.decode(latent_predictions)
            )
            if self.scale_input_to_tgt_size:
                image_inputs = torch.nn.functional.interpolate(
                    image_inputs,
                    image_reconstructions.shape[2:],
                    mode="bicubic",
                    antialias=True,
                )
            elif self.scale_tgt_to_input_size:
                image_reconstructions = torch.nn.functional.interpolate(
                    image_reconstructions,
                    image_inputs.shape[2:],
                    mode="bicubic",
                    antialias=True,
                )

            perceptual_loss2 = self.perceptual_loss(
                image_inputs.contiguous(), image_reconstructions.contiguous()
            )
            loss = loss + self.perceptual_weight_on_inputs * perceptual_loss2.mean()
            log[f"{split}/perceptual_loss_on_inputs"] = perceptual_loss2.mean().detach()
        return loss, log


class GeneralLPIPSWithDiscriminator(nn.Module):
    def __init__(
        self,
        disc_start: int,
        logvar_init: float = 0.0,
        pixelloss_weight=1.0,
        disc_num_layers: int = 3,
        disc_in_channels: int = 3,
        disc_factor: float = 1.0,
        disc_weight: float = 1.0,
        perceptual_weight: float = 1.0,
        disc_loss: str = "hinge",
        scale_input_to_tgt_size: bool = False,
        dims: int = 2,
        learn_logvar: bool = False,
        regularization_weights: Union[None, dict] = None,
    ):
        super().__init__()
        self.dims = dims
        if self.dims > 2:
            print(
                f"running with dims={dims}. This means that for perceptual loss calculation, "
                f"the LPIPS loss will be applied to each frame independently. "
            )
        self.scale_input_to_tgt_size = scale_input_to_tgt_size
        assert disc_loss in ["hinge", "vanilla"]
        self.pixel_weight = pixelloss_weight
        self.perceptual_loss = LPIPS().eval()
        self.perceptual_weight = perceptual_weight
        # output log variance
        self.logvar = nn.Parameter(torch.ones(size=()) * logvar_init)
        self.learn_logvar = learn_logvar

        self.discriminator = NLayerDiscriminator(
            input_nc=disc_in_channels, n_layers=disc_num_layers, use_actnorm=False
        ).apply(weights_init)
        self.discriminator_iter_start = disc_start
        self.disc_loss = hinge_d_loss if disc_loss == "hinge" else vanilla_d_loss
        self.disc_factor = disc_factor
        self.discriminator_weight = disc_weight
        self.regularization_weights = default(regularization_weights, {})

    def get_trainable_parameters(self) -> Any:
        return self.discriminator.parameters()

    def get_trainable_autoencoder_parameters(self) -> Any:
        if self.learn_logvar:
            yield self.logvar
        yield from ()

    def calculate_adaptive_weight(self, nll_loss, g_loss, last_layer=None):
        if last_layer is not None:
            nll_grads = torch.autograd.grad(nll_loss, last_layer, retain_graph=True)[0]
            g_grads = torch.autograd.grad(g_loss, last_layer, retain_graph=True)[0]
        else:
            nll_grads = torch.autograd.grad(
                nll_loss, self.last_layer[0], retain_graph=True
            )[0]
            g_grads = torch.autograd.grad(
                g_loss, self.last_layer[0], retain_graph=True
            )[0]

        d_weight = torch.norm(nll_grads) / (torch.norm(g_grads) + 1e-4)
        d_weight = torch.clamp(d_weight, 0.0, 1e4).detach()
        d_weight = d_weight * self.discriminator_weight
        return d_weight

    def forward(
        self,
        regularization_log,
        inputs,
        reconstructions,
        optimizer_idx,
        global_step,
        last_layer=None,
        split="train",
        weights=None,
    ):
        if self.scale_input_to_tgt_size:
            inputs = torch.nn.functional.interpolate(
                inputs, reconstructions.shape[2:], mode="bicubic", antialias=True
            )

        if self.dims > 2:
            inputs, reconstructions = map(
                lambda x: rearrange(x, "b c t h w -> (b t) c h w"),
                (inputs, reconstructions),
            )

        rec_loss = torch.abs(inputs.contiguous() - reconstructions.contiguous())
        if self.perceptual_weight > 0:
            p_loss = self.perceptual_loss(
                inputs.contiguous(), reconstructions.contiguous()
            )
            rec_loss = rec_loss + self.perceptual_weight * p_loss

        nll_loss = rec_loss / torch.exp(self.logvar) + self.logvar
        weighted_nll_loss = nll_loss
        if weights is not None:
            weighted_nll_loss = weights * nll_loss
        weighted_nll_loss = torch.sum(weighted_nll_loss) / weighted_nll_loss.shape[0]
        nll_loss = torch.sum(nll_loss) / nll_loss.shape[0]

        # now the GAN part
        if optimizer_idx == 0:
            # generator update
            logits_fake = self.discriminator(reconstructions.contiguous())
            g_loss = -torch.mean(logits_fake)

            if self.disc_factor > 0.0:
                try:
                    d_weight = self.calculate_adaptive_weight(
                        nll_loss, g_loss, last_layer=last_layer
                    )
                except RuntimeError:
                    assert not self.training
                    d_weight = torch.tensor(0.0)
            else:
                d_weight = torch.tensor(0.0)

            disc_factor = adopt_weight(
                self.disc_factor, global_step, threshold=self.discriminator_iter_start
            )
            loss = weighted_nll_loss + d_weight * disc_factor * g_loss
            log = dict()
            for k in regularization_log:
                if k in self.regularization_weights:
                    loss = loss + self.regularization_weights[k] * regularization_log[k]
                log[f"{split}/{k}"] = regularization_log[k].detach().mean()

            log.update(
                {
                    "{}/total_loss".format(split): loss.clone().detach().mean(),
                    "{}/logvar".format(split): self.logvar.detach(),
                    "{}/nll_loss".format(split): nll_loss.detach().mean(),
                    "{}/rec_loss".format(split): rec_loss.detach().mean(),
                    "{}/d_weight".format(split): d_weight.detach(),
                    "{}/disc_factor".format(split): torch.tensor(disc_factor),
                    "{}/g_loss".format(split): g_loss.detach().mean(),
                }
            )

            return loss, log

        if optimizer_idx == 1:
            # second pass for discriminator update
            logits_real = self.discriminator(inputs.contiguous().detach())
            logits_fake = self.discriminator(reconstructions.contiguous().detach())

            disc_factor = adopt_weight(
                self.disc_factor, global_step, threshold=self.discriminator_iter_start
            )
            d_loss = disc_factor * self.disc_loss(logits_real, logits_fake)

            log = {
                "{}/disc_loss".format(split): d_loss.clone().detach().mean(),
                "{}/logits_real".format(split): logits_real.detach().mean(),
                "{}/logits_fake".format(split): logits_fake.detach().mean(),
            }
            return d_loss, log


================================================
FILE: sgm/modules/autoencoding/regularizers/__init__.py
================================================
from abc import abstractmethod
from typing import Any, Tuple

import torch
import torch.nn as nn
import torch.nn.functional as F

from ....modules.distributions.distributions import DiagonalGaussianDistribution


class AbstractRegularizer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, z: torch.Tensor) -> Tuple[torch.Tensor, dict]:
        raise NotImplementedError()

    @abstractmethod
    def get_trainable_parameters(self) -> Any:
        raise NotImplementedError()


class DiagonalGaussianRegularizer(AbstractRegularizer):
    def __init__(self, sample: bool = True):
        super().__init__()
        self.sample = sample

    def get_trainable_parameters(self) -> Any:
        yield from ()

    def forward(self, z: torch.Tensor) -> Tuple[torch.Tensor, dict]:
        log = dict()
        posterior = DiagonalGaussianDistribution(z)
        if self.sample:
            z = posterior.sample()
        else:
            z = posterior.mode()
        kl_loss = posterior.kl()
        kl_loss = torch.sum(kl_loss) / kl_loss.shape[0]
        log["kl_loss"] = kl_loss
        return z, log


def measure_perplexity(predicted_indices, num_centroids):
    # src: https://github.com/karpathy/deep-vector-quantization/blob/main/model.py
    # eval cluster perplexity. when perplexity == num_embeddings then all clusters are used exactly equally
    encodings = (
        F.one_hot(predicted_indices, num_centroids).float().reshape(-1, num_centroids)
    )
    avg_probs = encodings.mean(0)
    perplexity = (-(avg_probs * torch.log(avg_probs + 1e-10)).sum()).exp()
    cluster_use = torch.sum(avg_probs > 0)
    return perplexity, cluster_use


================================================
FILE: sgm/modules/diffusionmodules/__init__.py
================================================
from .denoiser import Denoiser
from .discretizer import Discretization
from .loss import StandardDiffusionLoss
from .model import Model, Encoder, Decoder
from .openaimodel import UnifiedUNetModel
from .sampling import BaseDiffusionSampler
from .wrappers import OpenAIWrapper


================================================
FILE: sgm/modules/diffusionmodules/denoiser.py
================================================
import torch.nn as nn

from ...util import append_dims, instantiate_from_config


class Denoiser(nn.Module):
    def __init__(self, weighting_config, scaling_config):
        super().__init__()

        self.weighting = instantiate_from_config(weighting_config)
        self.scaling = instantiate_from_config(scaling_config)

    def possibly_quantize_sigma(self, sigma):
        return sigma

    def possibly_quantize_c_noise(self, c_noise):
        return c_noise

    def w(self, sigma):
        return self.weighting(sigma)

    def __call__(self, network, input, sigma, cond):
        sigma = self.possibly_quantize_sigma(sigma)
        sigma_shape = sigma.shape
        sigma = append_dims(sigma, input.ndim)
        c_skip, c_out, c_in, c_noise = self.scaling(sigma)
        c_noise = self.possibly_quantize_c_noise(c_noise.reshape(sigma_shape))
        return network(input * c_in, c_noise, cond) * c_out + input * c_skip


class DiscreteDenoiser(Denoiser):
    def __init__(
        self,
        weighting_config,
        scaling_config,
        num_idx,
        discretization_config,
        do_append_zero=False,
        quantize_c_noise=True,
        flip=True,
    ):
        super().__init__(weighting_config, scaling_config)
        sigmas = instantiate_from_config(discretization_config)(
            num_idx, do_append_zero=do_append_zero, flip=flip
        )
        self.register_buffer("sigmas", sigmas)
        self.quantize_c_noise = quantize_c_noise

    def sigma_to_idx(self, sigma):
        dists = sigma - self.sigmas[:, None]
        return dists.abs().argmin(dim=0).view(sigma.shape)

    def idx_to_sigma(self, idx):
        return self.sigmas[idx]

    def possibly_quantize_sigma(self, sigma):
        return self.idx_to_sigma(self.sigma_to_idx(sigma))

    def possibly_quantize_c_noise(self, c_noise):
        if self.quantize_c_noise:
            return self.sigma_to_idx(c_noise)
        else:
            return c_noise


================================================
FILE: sgm/modules/diffusionmodules/denoiser_scaling.py
================================================
import torch


class EDMScaling:
    def __init__(self, sigma_data=0.5):
        self.sigma_data = sigma_data

    def __call__(self, sigma):
        c_skip = self.sigma_data**2 / (sigma**2 + self.sigma_data**2)
        c_out = sigma * self.sigma_data / (sigma**2 + self.sigma_data**2) ** 0.5
        c_in = 1 / (sigma**2 + self.sigma_data**2) ** 0.5
        c_noise = 0.25 * sigma.log()
        return c_skip, c_out, c_in, c_noise


class EpsScaling:
    def __call__(self, sigma):
        c_skip = torch.ones_like(sigma, device=sigma.device)
        c_out = -sigma
        c_in = 1 / (sigma**2 + 1.0) ** 0.5
        c_noise = sigma.clone()
        return c_skip, c_out, c_in, c_noise


class VScaling:
    def __call__(self, sigma):
        c_skip = 1.0 / (sigma**2 + 1.0)
        c_out = -sigma / (sigma**2 + 1.0) ** 0.5
        c_in = 1.0 / (sigma**2 + 1.0) ** 0.5
        c_noise = sigma.clone()
        return c_skip, c_out, c_in, c_noise


================================================
FILE: sgm/modules/diffusionmodules/denoiser_weighting.py
================================================
import torch


class UnitWeighting:
    def __call__(self, sigma):
        return torch.ones_like(sigma, device=sigma.device)


class EDMWeighting:
    def __init__(self, sigma_data=0.5):
        self.sigma_data = sigma_data

    def __call__(self, sigma):
        return (sigma**2 + self.sigma_data**2) / (sigma * self.sigma_data) ** 2


class VWeighting(EDMWeighting):
    def __init__(self):
        super().__init__(sigma_data=1.0)


class EpsWeighting:
    def __call__(self, sigma):
        return sigma**-2.0


================================================
FILE: sgm/modules/diffusionmodules/discretizer.py
================================================
import torch
import numpy as np
from functools import partial
from abc import abstractmethod

from ...util import append_zero
from ...modules.diffusionmodules.util import make_beta_schedule


def generate_roughly_equally_spaced_steps(
    num_substeps: int, max_step: int
) -> np.ndarray:
    return np.linspace(max_step - 1, 0, num_substeps, endpoint=False).astype(int)[::-1]


class Discretization:
    def __call__(self, n, do_append_zero=True, device="cpu", flip=False):
        sigmas = self.get_sigmas(n, device=device)
        sigmas = append_zero(sigmas) if do_append_zero else sigmas
        return sigmas if not flip else torch.flip(sigmas, (0,))

    @abstractmethod
    def get_sigmas(self, n, device):
        pass


class EDMDiscretization(Discretization):
    def __init__(self, sigma_min=0.02, sigma_max=80.0, rho=7.0):
        self.sigma_min = sigma_min
        self.sigma_max = sigma_max
        self.rho = rho

    def get_sigmas(self, n, device="cpu"):
        ramp = torch.linspace(0, 1, n, device=device)
        min_inv_rho = self.sigma_min ** (1 / self.rho)
        max_inv_rho = self.sigma_max ** (1 / self.rho)
        sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** self.rho
        return sigmas


class LegacyDDPMDiscretization(Discretization):
    def __init__(
        self,
        linear_start=0.00085,
        linear_end=0.0120,
        num_timesteps=1000,
    ):
        super().__init__()
        self.num_timesteps = num_timesteps
        betas = make_beta_schedule(
            "linear", num_timesteps, linear_start=linear_start, linear_end=linear_end
        )
        alphas = 1.0 - betas
        self.alphas_cumprod = np.cumprod(alphas, axis=0)
        self.to_torch = partial(torch.tensor, dtype=torch.float32)

    def get_sigmas(self, n, device="cpu"):
        if n < self.num_timesteps:
            timesteps = generate_roughly_equally_spaced_steps(n, self.num_timesteps)
            alphas_cumprod = self.alphas_cumprod[timesteps]
        elif n == self.num_timesteps:
            alphas_cumprod = self.alphas_cumprod
        else:
            raise ValueError

        to_torch = partial(torch.tensor, dtype=torch.float32, device=device)
        sigmas = to_torch((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
        return torch.flip(sigmas, (0,))


================================================
FILE: sgm/modules/diffusionmodules/guiders.py
================================================
from functools import partial

import torch

from ...util import default, instantiate_from_config


class VanillaCFG:
    """
    implements parallelized CFG
    """

    def __init__(self, scale, dyn_thresh_config=None):
        scale_schedule = lambda scale, sigma: scale  # independent of step
        self.scale_schedule = partial(scale_schedule, scale)
        self.dyn_thresh = instantiate_from_config(
            default(
                dyn_thresh_config,
                {
                    "target": "sgm.modules.diffusionmodules.sampling_utils.NoDynamicThresholding"
                },
            )
        )

    def __call__(self, x, sigma):
        x_u, x_c = x.chunk(2)
        scale_value = self.scale_schedule(sigma)
        x_pred = self.dyn_thresh(x_u, x_c, scale_value)
        return x_pred

    def prepare_inputs(self, x, s, c, uc):
        c_out = dict()

        for k in c:
            if k in ["vector", "t_crossattn", "v_crossattn", "concat"]:
                c_out[k] = torch.cat((uc[k], c[k]), 0)
            else:
                assert c[k] == uc[k]
                c_out[k] = c[k]
        return torch.cat([x] * 2), torch.cat([s] * 2), c_out
    

class IdentityGuider:
    def __call__(self, x, sigma):
        return x

    def prepare_inputs(self, x, s, c, uc):
        c_out = dict()

        for k in c:
            c_out[k] = c[k]

        return x, s, c_out


================================================
FILE: sgm/modules/diffusionmodules/loss.py
================================================
from typing import List, Optional, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from omegaconf import ListConfig
from torchvision.utils import save_image
from ...util import append_dims, instantiate_from_config


class StandardDiffusionLoss(nn.Module):
    def __init__(
        self,
        sigma_sampler_config,
        type="l2",
        offset_noise_level=0.0,
        batch2model_keys: Optional[Union[str, List[str], ListConfig]] = None,
    ):
        super().__init__()

        assert type in ["l2", "l1"]

        self.sigma_sampler = instantiate_from_config(sigma_sampler_config)

        self.type = type
        self.offset_noise_level = offset_noise_level

        if not batch2model_keys:
            batch2model_keys = []

        if isinstance(batch2model_keys, str):
            batch2model_keys = [batch2model_keys]

        self.batch2model_keys = set(batch2model_keys)

    def __call__(self, network, denoiser, conditioner, input, batch, *args, **kwarg):
        cond = conditioner(batch)
        additional_model_inputs = {
            key: batch[key] for key in self.batch2model_keys.intersection(batch)
        }

        sigmas = self.sigma_sampler(input.shape[0]).to(input.device)
        noise = torch.randn_like(input)
        if self.offset_noise_level > 0.0:
            noise = noise + self.offset_noise_level * append_dims(
                torch.randn(input.shape[0], device=input.device), input.ndim
            )
        noised_input = input + noise * append_dims(sigmas, input.ndim)
        model_output = denoiser(
            network, noised_input, sigmas, cond, **additional_model_inputs
        )
        w = append_dims(denoiser.w(sigmas), input.ndim)

        loss = self.get_diff_loss(model_output, input, w)
        loss = loss.mean()
        loss_dict = {"loss": loss}

        return loss, loss_dict

    def get_diff_loss(self, model_output, target, w):
        if self.type == "l2":
            return torch.mean(
                (w * (model_output - target) ** 2).reshape(target.shape[0], -1), 1
            )
        elif self.type == "l1":
            return torch.mean(
                (w * (model_output - target).abs()).reshape(target.shape[0], -1), 1
            )


class FullLoss(StandardDiffusionLoss):

    def __init__(
        self,
        seq_len=12,
        kernel_size=3,
        gaussian_sigma=0.5,
        min_attn_size=16,
        lambda_local_loss=0.0,
        lambda_ocr_loss=0.0,
        lambda_style_loss=0.0,
        ocr_enabled = False,
        style_enabled = False,
        predictor_config = None,
        *args, **kwarg
    ):
        super().__init__(*args, **kwarg)

        self.gaussian_kernel_size = kernel_size
        gaussian_kernel = self.get_gaussian_kernel(kernel_size=self.gaussian_kernel_size, sigma=gaussian_sigma, out_channels=seq_len)
        self.register_buffer("g_kernel", gaussian_kernel.requires_grad_(False))

        self.min_attn_size = min_attn_size
        self.lambda_local_loss = lambda_local_loss
        self.lambda_ocr_loss = lambda_ocr_loss
        self.lambda_style_loss = lambda_style_loss

        self.style_enabled = style_enabled
        self.ocr_enabled = ocr_enabled
        if ocr_enabled:
            self.predictor = instantiate_from_config(predictor_config)
    
    def get_gaussian_kernel(self, kernel_size=3, sigma=1, out_channels=3):
        # Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2)
        x_coord = torch.arange(kernel_size)
        x_grid = x_coord.repeat(kernel_size).view(kernel_size, kernel_size)
        y_grid = x_grid.t()
        xy_grid = torch.stack([x_grid, y_grid], dim=-1).float()

        mean = (kernel_size - 1)/2.
        variance = sigma**2.

        # Calculate the 2-dimensional gaussian kernel which is
        # the product of two gaussian distributions for two different
        # variables (in this case called x and y)
        gaussian_kernel = (1./(2.*torch.pi*variance)) *\
                        torch.exp(
                            -torch.sum((xy_grid - mean)**2., dim=-1) /\
                            (2*variance)
                        )

        # Make sure sum of values in gaussian kernel equals 1.
        gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel)

        # Reshape to 2d depthwise convolutional weight
        gaussian_kernel = gaussian_kernel.view(1, 1, kernel_size, kernel_size)
        gaussian_kernel = gaussian_kernel.tile(out_channels, 1, 1, 1)
        
        return gaussian_kernel

    def __call__(self, network, denoiser, conditioner, input, batch, first_stage_model, scaler):

        cond = conditioner(batch)

        sigmas = self.sigma_sampler(input.shape[0]).to(input.device)
        noise = torch.randn_like(input)
        if self.offset_noise_level > 0.0:
            noise = noise + self.offset_noise_level * append_dims(
                torch.randn(input.shape[0], device=input.device), input.ndim
            )

        noised_input = input + noise * append_dims(sigmas, input.ndim)
        model_output = denoiser(network, noised_input, sigmas, cond)
        w = append_dims(denoiser.w(sigmas), input.ndim)

        diff_loss = self.get_diff_loss(model_output, input, w)
        local_loss = self.get_local_loss(network.diffusion_model.attn_map_cache, batch["seg"], batch["seg_mask"])
        diff_loss = diff_loss.mean()
        local_loss = local_loss.mean()

        if self.ocr_enabled:
            ocr_loss = self.get_ocr_loss(model_output, batch["r_bbox"], batch["label"], first_stage_model, scaler)
            ocr_loss = ocr_loss.mean()

        if self.style_enabled:
            style_loss = self.get_style_local_loss(network.diffusion_model.attn_map_cache, batch["mask"])
            style_loss = style_loss.mean()

        loss = diff_loss + self.lambda_local_loss * local_loss
        if self.ocr_enabled:
            loss += self.lambda_ocr_loss * ocr_loss
        if self.style_enabled:
            loss += self.lambda_style_loss * style_loss

        loss_dict = {
            "loss/diff_loss": diff_loss,
            "loss/local_loss": local_loss,
            "loss/full_loss": loss
        }

        if self.ocr_enabled:
            loss_dict["loss/ocr_loss"] = ocr_loss
        if self.style_enabled:
            loss_dict["loss/style_loss"] = style_loss

        return loss, loss_dict
    
    def get_ocr_loss(self, model_output, r_bbox, label, first_stage_model, scaler):

        model_output = 1 / scaler * model_output
        model_output_decoded = first_stage_model.decode(model_output)
        model_output_crops = []
        
        for i, bbox in enumerate(r_bbox):
            m_top, m_bottom, m_left, m_right = bbox
            model_output_crops.append(model_output_decoded[i, :, m_top:m_bottom, m_left:m_right])

        loss = self.predictor.calc_loss(model_output_crops, label)

        return loss

    def get_min_local_loss(self, attn_map_cache, mask, seg_mask):

        loss = 0
        count = 0

        for item in attn_map_cache:

            name = item["name"]
            if not name.endswith("t_attn"): continue

            heads = item["heads"]
            size = item["size"]
            attn_map = item["attn_map"]

            if size < self.min_attn_size: continue

            seg_l = seg_mask.shape[1]

            bh, n, l = attn_map.shape # bh: batch size * heads / n : pixel length(h*w) / l: token length
            attn_map = attn_map.reshape((-1, heads, n, l)) # b, h, n, l
            
            assert seg_l <= l
            attn_map = attn_map[..., :seg_l]
            attn_map = attn_map.permute(0, 1, 3, 2) # b, h, l, n
            attn_map = attn_map.mean(dim = 1) # b, l, n

            attn_map = attn_map.reshape((-1, seg_l, size, size)) # b, l, s, s
            attn_map = F.conv2d(attn_map, self.g_kernel, padding = self.gaussian_kernel_size//2, groups=seg_l) # gaussian blur on each channel
            attn_map = attn_map.reshape((-1, seg_l, n)) # b, l, n
            
            mask_map = F.interpolate(mask, (size, size))
            mask_map = mask_map.tile((1, seg_l, 1, 1))
            mask_map = mask_map.reshape((-1, seg_l, n)) # b, l, n

            p_loss = (mask_map * attn_map).max(dim = -1)[0] # b, l
            p_loss = p_loss + (1 - seg_mask) # b, l
            p_loss = p_loss.min(dim = -1)[0] # b,

            loss += -p_loss
            count += 1

        loss = loss / count

        return loss

    def get_local_loss(self, attn_map_cache, seg, seg_mask):

        loss = 0
        count = 0

        for item in attn_map_cache:

            name = item["name"]
            if not name.endswith("t_attn"): continue

            heads = item["heads"]
            size = item["size"]
            attn_map = item["attn_map"]

            if size < self.min_attn_size: continue

            seg_l = seg_mask.shape[1]

            bh, n, l = attn_map.shape # bh: batch size * heads / n: pixel length(h*w) / l: token length
            attn_map = attn_map.reshape((-1, heads, n, l)) # b, h, n, l
            
            assert seg_l <= l
            attn_map = attn_map[..., :seg_l]
            attn_map = attn_map.permute(0, 1, 3, 2) # b, h, l, n
            attn_map = attn_map.mean(dim = 1) # b, l, n

            attn_map = attn_map.reshape((-1, seg_l, size, size)) # b, l, s, s
            attn_map = F.conv2d(attn_map, self.g_kernel, padding = self.gaussian_kernel_size//2, groups=seg_l) # gaussian blur on each channel
            attn_map = attn_map.reshape((-1, seg_l, n)) # b, l, n

            seg_map = F.interpolate(seg, (size, size))
            seg_map = seg_map.reshape((-1, seg_l, n)) # b, l, n
            n_seg_map = 1 - seg_map

            p_loss = (seg_map * attn_map).max(dim = -1)[0] # b, l
            n_loss = (n_seg_map * attn_map).max(dim = -1)[0] # b, l

            p_loss = p_loss * seg_mask # b, l
            n_loss = n_loss * seg_mask # b, l

            p_loss = p_loss.sum(dim = -1) / seg_mask.sum(dim = -1) # b,
            n_loss = n_loss.sum(dim = -1) / seg_mask.sum(dim = -1) # b,

            f_loss = n_loss - p_loss # b,
            loss += f_loss
            count += 1

        loss = loss / count

        return loss
    

================================================
FILE: sgm/modules/diffusionmodules/model.py
================================================
# pytorch_diffusion + derived encoder decoder
import math
from typing import Any, Callable, Optional

import numpy as np
import torch
import torch.nn as nn
from einops import rearrange
from packaging import version

try:
    import xformers
    import xformers.ops

    XFORMERS_IS_AVAILABLE = True
except:
    XFORMERS_IS_AVAILABLE = False
    print("no module 'xformers'. Processing without...")

from ...modules.attention import LinearAttention, MemoryEfficientCrossAttention


def get_timestep_embedding(timesteps, embedding_dim):
    """
    This matches the implementation in Denoising Diffusion Probabilistic Models:
    From Fairseq.
    Build sinusoidal embeddings.
    This matches the implementation in tensor2tensor, but differs slightly
    from the description in Section 3.5 of "Attention Is All You Need".
    """
    assert len(timesteps.shape) == 1

    half_dim = embedding_dim // 2
    emb = math.log(10000) / (half_dim - 1)
    emb = torch.exp(torch.arange(half_dim, dtype=torch.float32) * -emb)
    emb = emb.to(device=timesteps.device)
    emb = timesteps.float()[:, None] * emb[None, :]
    emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
    if embedding_dim % 2 == 1:  # zero pad
        emb = torch.nn.functional.pad(emb, (0, 1, 0, 0))
    return emb


def nonlinearity(x):
    # swish
    return x * torch.sigmoid(x)


def Normalize(in_channels, num_groups=32):
    return torch.nn.GroupNorm(
        num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True
    )


class Upsample(nn.Module):
    def __init__(self, in_channels, with_conv):
        super().__init__()
        self.with_conv = with_conv
        if self.with_conv:
            self.conv = torch.nn.Conv2d(
                in_channels, in_channels, kernel_size=3, stride=1, padding=1
            )

    def forward(self, x):
        x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
        if self.with_conv:
            x = self.conv(x)
        return x


class Downsample(nn.Module):
    def __init__(self, in_channels, with_conv):
        super().__init__()
        self.with_conv = with_conv
        if self.with_conv:
            # no asymmetric padding in torch conv, must do it ourselves
            self.conv = torch.nn.Conv2d(
                in_channels, in_channels, kernel_size=3, stride=2, padding=0
            )

    def forward(self, x):
        if self.with_conv:
            pad = (0, 1, 0, 1)
            x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
            x = self.conv(x)
        else:
            x = torch.nn.functional.avg_pool2d(x, kernel_size=2, stride=2)
        return x


class ResnetBlock(nn.Module):
    def __init__(
        self,
        *,
        in_channels,
        out_channels=None,
        conv_shortcut=False,
        dropout,
        temb_channels=512,
    ):
        super().__init__()
        self.in_channels = in_channels
        out_channels = in_channels if out_channels is None else out_channels
        self.out_channels = out_channels
        self.use_conv_shortcut = conv_shortcut

        self.norm1 = Normalize(in_channels)
        self.conv1 = torch.nn.Conv2d(
            in_channels, out_channels, kernel_size=3, stride=1, padding=1
        )
        if temb_channels > 0:
            self.temb_proj = torch.nn.Linear(temb_channels, out_channels)
        self.norm2 = Normalize(out_channels)
        self.dropout = torch.nn.Dropout(dropout)
        self.conv2 = torch.nn.Conv2d(
            out_channels, out_channels, kernel_size=3, stride=1, padding=1
        )
        if self.in_channels != self.out_channels:
            if self.use_conv_shortcut:
                self.conv_shortcut = torch.nn.Conv2d(
                    in_channels, out_channels, kernel_size=3, stride=1, padding=1
                )
            else:
                self.nin_shortcut = torch.nn.Conv2d(
                    in_channels, out_channels, kernel_size=1, stride=1, padding=0
                )

    def forward(self, x, temb):
        h = x
        h = self.norm1(h)
        h = nonlinearity(h)
        h = self.conv1(h)

        if temb is not None:
            h = h + self.temb_proj(nonlinearity(temb))[:, :, None, None]

        h = self.norm2(h)
        h = nonlinearity(h)
        h = self.dropout(h)
        h = self.conv2(h)

        if self.in_channels != self.out_channels:
            if self.use_conv_shortcut:
                x = self.conv_shortcut(x)
            else:
                x = self.nin_shortcut(x)

        return x + h


class LinAttnBlock(LinearAttention):
    """to match AttnBlock usage"""

    def __init__(self, in_channels):
        super().__init__(dim=in_channels, heads=1, dim_head=in_channels)


class AttnBlock(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.in_channels = in_channels

        self.norm = Normalize(in_channels)
        self.q = torch.nn.Conv2d(
            in_channels, in_channels, kernel_size=1, stride=1, padding=0
        )
        self.k = torch.nn.Conv2d(
            in_channels, in_channels, kernel_size=1, stride=1, padding=0
        )
        self.v = torch.nn.Conv2d(
            in_channels, in_channels, kernel_size=1, stride=1, padding=0
        )
        self.proj_out = torch.nn.Conv2d(
            in_channels, in_channels, kernel_size=1, stride=1, padding=0
        )

    def attention(self, h_: torch.Tensor) -> torch.Tensor:
        h_ = self.norm(h_)
        q = self.q(h_)
        k = self.k(h_)
        v = self.v(h_)

        b, c, h, w = q.shape
        q, k, v = map(
            lambda x: rearrange(x, "b c h w -> b 1 (h w) c").contiguous(), (q, k, v)
        )
        h_ = torch.nn.functional.scaled_dot_product_attention(
            q, k, v
        )  # scale is dim ** -0.5 per default
        # compute attention

        return rearrange(h_, "b 1 (h w) c -> b c h w", h=h, w=w, c=c, b=b)

    def forward(self, x, **kwargs):
        h_ = x
        h_ = self.attention(h_)
        h_ = self.proj_out(h_)
        return x + h_


class MemoryEfficientAttnBlock(nn.Module):
    """
    Uses xformers efficient implementation,
    see https://github.com/MatthieuTPHR/diffusers/blob/d80b531ff8060ec1ea982b65a1b8df70f73aa67c/src/diffusers/models/attention.py#L223
    Note: this is a single-head self-attention operation
    """

    #
    def __init__(self, in_channels):
        super().__init__()
        self.in_channels = in_channels

        self.norm = Normalize(in_channels)
        self.q = torch.nn.Conv2d(
            in_channels, in_channels, kernel_size=1, stride=1, padding=0
        )
        self.k = torch.nn.Conv2d(
            in_channels, in_channels, kernel_size=1, stride=1, padding=0
        )
        self.v = torch.nn.Conv2d(
            in_channels, in_channels, kernel_size=1, stride=1, padding=0
        )
        self.proj_out = torch.nn.Conv2d(
            in_channels, in_channels, kernel_size=1, stride=1, padding=0
        )
        self.attention_op: Optional[Any] = None

    def attention(self, h_: torch.Tensor) -> torch.Tensor:
        h_ = self.norm(h_)
        q = self.q(h_)
        k = self.k(h_)
        v = self.v(h_)

        # compute attention
        B, C, H, W = q.shape
        q, k, v = map(lambda x: rearrange(x, "b c h w -> b (h w) c"), (q, k, v))

        q, k, v = map(
            lambda t: t.unsqueeze(3)
            .reshape(B, t.shape[1], 1, C)
            .permute(0, 2, 1, 3)
            .reshape(B * 1, t.shape[1], C)
            .contiguous(),
            (q, k, v),
        )
        out = xformers.ops.memory_efficient_attention(
            q, k, v, attn_bias=None, op=self.attention_op
        )

        out = (
            out.unsqueeze(0)
            .reshape(B, 1, out.shape[1], C)
            .permute(0, 2, 1, 3)
            .reshape(B, out.shape[1], C)
        )
        return rearrange(out, "b (h w) c -> b c h w", b=B, h=H, w=W, c=C)

    def forward(self, x, **kwargs):
        h_ = x
        h_ = self.attention(h_)
        h_ = self.proj_out(h_)
        return x + h_


class MemoryEfficientCrossAttentionWrapper(MemoryEfficientCrossAttention):
    def forward(self, x, context=None, mask=None, **unused_kwargs):
        b, c, h, w = x.shape
        x = rearrange(x, "b c h w -> b (h w) c")
        out = super().forward(x, context=context, mask=mask)
        out = rearrange(out, "b (h w) c -> b c h w", h=h, w=w, c=c)
        return x + out


def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None):
    assert attn_type in [
        "vanilla",
        "vanilla-xformers",
        "memory-efficient-cross-attn",
        "linear",
        "none",
    ], f"attn_type {attn_type} unknown"
    if (
        version.parse(torch.__version__) < version.parse("2.0.0")
        and attn_type != "none"
    ):
        assert XFORMERS_IS_AVAILABLE, (
            f"We do not support vanilla attention in {torch.__version__} anymore, "
            f"as it is too expensive. Please install xformers via e.g. 'pip install xformers==0.0.16'"
        )
        attn_type = "vanilla-xformers"
    print(f"making attention of type '{attn_type}' with {in_channels} in_channels")
    if attn_type == "vanilla":
        assert attn_kwargs is None
        return AttnBlock(in_channels)
    elif attn_type == "vanilla-xformers":
        print(f"building MemoryEfficientAttnBlock with {in_channels} in_channels...")
        return MemoryEfficientAttnBlock(in_channels)
    elif type == "memory-efficient-cross-attn":
        attn_kwargs["query_dim"] = in_channels
        return MemoryEfficientCrossAttentionWrapper(**attn_kwargs)
    elif attn_type == "none":
        return nn.Identity(in_channels)
    else:
        return LinAttnBlock(in_channels)


class Model(nn.Module):
    def __init__(
        self,
        *,
        ch,
        out_ch,
        ch_mult=(1, 2, 4, 8),
        num_res_blocks,
        attn_resolutions,
        dropout=0.0,
        resamp_with_conv=True,
        in_channels,
        resolution,
        use_timestep=True,
        use_linear_attn=False,
        attn_type="vanilla",
    ):
        super().__init__()
        if use_linear_attn:
            attn_type = "linear"
        self.ch = ch
        self.temb_ch = self.ch * 4
        self.num_resolutions = len(ch_mult)
        self.num_res_blocks = num_res_blocks
        self.resolution = resolution
        self.in_channels = in_channels

        self.use_timestep = use_timestep
        if self.use_timestep:
            # timestep embedding
            self.temb = nn.Module()
            self.temb.dense = nn.ModuleList(
                [
                    torch.nn.Linear(self.ch, self.temb_ch),
                    torch.nn.Linear(self.temb_ch, self.temb_ch),
                ]
            )

        # downsampling
        self.conv_in = torch.nn.Conv2d(
            in_channels, self.ch, kernel_size=3, stride=1, padding=1
        )

        curr_res = resolution
        in_ch_mult = (1,) + tuple(ch_mult)
        self.down = nn.ModuleList()
        for i_level in range(self.num_resolutions):
            block = nn.ModuleList()
            attn = nn.ModuleList()
            block_in = ch * in_ch_mult[i_level]
            block_out = ch * ch_mult[i_level]
            for i_block in range(self.num_res_blocks):
                block.append(
                    ResnetBlock(
                        in_channels=block_in,
                        out_channels=block_out,
                        temb_channels=self.temb_ch,
                        dropout=dropout,
                    )
                )
                block_in = block_out
                if curr_res in attn_resolutions:
                    attn.append(make_attn(block_in, attn_type=attn_type))
            down = nn.Module()
            down.block = block
            down.attn = attn
            if i_level != self.num_resolutions - 1:
                down.downsample = Downsample(block_in, resamp_with_conv)
                curr_res = curr_res // 2
            self.down.append(down)

        # middle
        self.mid = nn.Module()
        self.mid.block_1 = ResnetBlock(
            in_channels=block_in,
            out_channels=block_in,
            temb_channels=self.temb_ch,
            dropout=dropout,
        )
        self.mid.attn_1 = make_attn(block_in, attn_type=attn_type)
        self.mid.block_2 = ResnetBlock(
            in_channels=block_in,
            out_channels=block_in,
            temb_channels=self.temb_ch,
            dropout=dropout,
        )

        # upsampling
        self.up = nn.ModuleList()
        for i_level in reversed(range(self.num_resolutions)):
            block = nn.ModuleList()
            attn = nn.ModuleList()
            block_out = ch * ch_mult[i_level]
            skip_in = ch * ch_mult[i_level]
            for i_block in range(self.num_res_blocks + 1):
                if i_block == self.num_res_blocks:
                    skip_in = ch * in_ch_mult[i_level]
                block.append(
                    ResnetBlock(
                        in_channels=block_in + skip_in,
                        out_channels=block_out,
                        temb_channels=self.temb_ch,
                        dropout=dropout,
                    )
                )
                block_in = block_out
                if curr_res in attn_resolutions:
                    attn.append(make_attn(block_in, attn_type=attn_type))
            up = nn.Module()
            up.block = block
            up.attn = attn
            if i_level != 0:
                up.upsample = Upsample(block_in, resamp_with_conv)
                curr_res = curr_res * 2
            self.up.insert(0, up)  # prepend to get consistent order

        # end
        self.norm_out = Normalize(block_in)
        self.conv_out = torch.nn.Conv2d(
            block_in, out_ch, kernel_size=3, stride=1, padding=1
        )

    def forward(self, x, t=None, context=None):
        # assert x.shape[2] == x.shape[3] == self.resolution
        if context is not None:
            # assume aligned context, cat along channel axis
            x = torch.cat((x, context), dim=1)
        if self.use_timestep:
            # timestep embedding
            assert t is not None
            temb = get_timestep_embedding(t, self.ch)
            temb = self.temb.dense[0](temb)
            temb = nonlinearity(temb)
            temb = self.temb.dense[1](temb)
        else:
            temb = None

        # downsampling
        hs = [self.conv_in(x)]
        for i_level in range(self.num_resolutions):
            for i_block in range(self.num_res_blocks):
                h = self.down[i_level].block[i_block](hs[-1], temb)
                if len(self.down[i_level].attn) > 0:
                    h = self.down[i_level].attn[i_block](h)
                hs.append(h)
            if i_level != self.num_resolutions - 1:
                hs.append(self.down[i_level].downsample(hs[-1]))

        # middle
        h = hs[-1]
        h = self.mid.block_1(h, temb)
        h = self.mid.attn_1(h)
        h = self.mid.block_2(h, temb)

        # upsampling
        for i_level in reversed(range(self.num_resolutions)):
            for i_block in range(self.num_res_blocks + 1):
                h = self.up[i_level].block[i_block](
                    torch.cat([h, hs.pop()], dim=1), temb
                )
                if len(self.up[i_level].attn) > 0:
                    h = self.up[i_level].attn[i_block](h)
            if i_level != 0:
                h = self.up[i_level].upsample(h)

        # end
        h = self.norm_out(h)
        h = nonlinearity(h)
        h = self.conv_out(h)
        return h

    def get_last_layer(self):
        return self.conv_out.weight


class Encoder(nn.Module):
    def __init__(
        self,
        *,
        ch,
        out_ch,
        ch_mult=(1, 2, 4, 8),
        num_res_blocks,
        attn_resolutions,
        dropout=0.0,
        resamp_with_conv=True,
        in_channels,
        resolution,
        z_channels,
        double_z=True,
        use_linear_attn=False,
        attn_type="vanilla",
        **ignore_kwargs,
    ):
        super().__init__()
        if use_linear_attn:
            attn_type = "linear"
        self.ch = ch
        self.temb_ch = 0
        self.num_resolutions = len(ch_mult)
        self.num_res_blocks = num_res_blocks
        self.resolution = resolution
        self.in_channels = in_channels

        # downsampling
        self.conv_in = torch.nn.Conv2d(
            in_channels, self.ch, kernel_size=3, stride=1, padding=1
        )

        curr_res = resolution
        in_ch_mult = (1,) + tuple(ch_mult)
        self.in_ch_mult = in_ch_mult
        self.down = nn.ModuleList()
        for i_level in range(self.num_resolutions):
            block = nn.ModuleList()
            attn = nn.ModuleList()
            block_in = ch * in_ch_mult[i_level]
            block_out = ch * ch_mult[i_level]
            for i_block in range(self.num_res_blocks):
                block.append(
                    ResnetBlock(
                        in_channels=block_in,
                        out_channels=block_out,
                        temb_channels=self.temb_ch,
                        dropout=dropout,
                    )
                )
                block_in = block_out
                if curr_res in attn_resolutions:
                    attn.append(make_attn(block_in, attn_type=attn_type))
            down = nn.Module()
            down.block = block
            down.attn = attn
            if i_level != self.num_resolutions - 1:
                down.downsample = Downsample(block_in, resamp_with_conv)
                curr_res = curr_res // 2
            self.down.append(down)

        # middle
        self.mid = nn.Module()
        self.mid.block_1 = ResnetBlock(
            in_channels=block_in,
            out_channels=block_in,
            temb_channels=self.temb_ch,
            dropout=dropout,
        )
        self.mid.attn_1 = make_attn(block_in, attn_type=attn_type)
        self.mid.block_2 = ResnetBlock(
            in_channels=block_in,
            out_channels=block_in,
            temb_channels=self.temb_ch,
            dropout=dropout,
        )

        # end
        self.norm_out = Normalize(block_in)
        self.conv_out = torch.nn.Conv2d(
            block_in,
            2 * z_channels if double_z else z_channels,
            kernel_size=3,
            stride=1,
            padding=1,
        )

    def forward(self, x):
        # timestep embedding
        temb = None

        # downsampling
        hs = [self.conv_in(x)]
        for i_level in range(self.num_resolutions):
            for i_block in range(self.num_res_blocks):
                h = self.down[i_level].block[i_block](hs[-1], temb)
                if len(self.down[i_level].attn) > 0:
                    h = self.down[i_level].attn[i_block](h)
                hs.append(h)
            if i_level != self.num_resolutions - 1:
                hs.append(self.down[i_level].downsample(hs[-1]))

        # middle
        h = hs[-1]
        h = self.mid.block_1(h, temb)
        h = self.mid.attn_1(h)
        h = self.mid.block_2(h, temb)

        # end
        h = self.norm_out(h)
        h = nonlinearity(h)
        h = self.conv_out(h)
        return h


class Decoder(nn.Module):
    def __init__(
        self,
        *,
        ch,
        out_ch,
        ch_mult=(1, 2, 4, 8),
        num_res_blocks,
        attn_resolutions,
        dropout=0.0,
        resamp_with_conv=True,
        in_channels,
        resolution,
        z_channels,
        give_pre_end=False,
        tanh_out=False,
        use_linear_attn=False,
        attn_type="vanilla",
        **ignorekwargs,
    ):
        super().__init__()
        if use_linear_attn:
            attn_type = "linear"
        self.ch = ch
        self.temb_ch = 0
        self.num_resolutions = len(ch_mult)
        self.num_res_blocks = num_res_blocks
        self.resolution = resolution
        self.in_channels = in_channels
        self.give_pre_end = give_pre_end
        self.tanh_out = tanh_out

        # compute in_ch_mult, block_in and curr_res at lowest res
        in_ch_mult = (1,) + tuple(ch_mult)
        block_in = ch * ch_mult[self.num_resolutions - 1]
        curr_res = resolution // 2 ** (self.num_resolutions - 1)
        self.z_shape = (1, z_channels, curr_res, curr_res)
        print(
            "Working with z of shape {} = {} dimensions.".format(
                self.z_shape, np.prod(self.z_shape)
            )
        )

        make_attn_cls = self._make_attn()
        make_resblock_cls = self._make_resblock()
        make_conv_cls = self._make_conv()
        # z to block_in
        self.conv_in = torch.nn.Conv2d(
            z_channels, block_in, kernel_size=3, stride=1, padding=1
        )

        # middle
        self.mid = nn.Module()
        self.mid.block_1 = make_resblock_cls(
            in_channels=block_in,
            out_channels=block_in,
            temb_channels=self.temb_ch,
            dropout=dropout,
        )
        self.mid.attn_1 = make_attn_cls(block_in, attn_type=attn_type)
        self.mid.block_2 = make_resblock_cls(
            in_channels=block_in,
            out_channels=block_in,
            temb_channels=self.temb_ch,
            dropout=dropout,
        )

        # upsampling
        self.up = nn.ModuleList()
        for i_level in reversed(range(self.num_resolutions)):
            block = nn.ModuleList()
            attn = nn.ModuleList()
            block_out = ch * ch_mult[i_level]
            for i_block in range(self.num_res_blocks + 1):
                block.append(
                    make_resblock_cls(
                        in_channels=block_in,
                        out_channels=block_out,
                        temb_channels=self.temb_ch,
                        dropout=dropout,
                    )
                )
                block_in = block_out
                if curr_res in attn_resolutions:
                    attn.append(make_attn_cls(block_in, attn_type=attn_type))
            up = nn.Module()
            up.block = block
            up.attn = attn
            if i_level != 0:
                up.upsample = Upsample(block_in, resamp_with_conv)
                curr_res = curr_res * 2
            self.up.insert(0, up)  # prepend to get consistent order

        # end
        self.norm_out = Normalize(block_in)
        self.conv_out = make_conv_cls(
            block_in, out_ch, kernel_size=3, stride=1, padding=1
        )

    def _make_attn(self) -> Callable:
        return make_attn

    def _make_resblock(self) -> Callable:
        return ResnetBlock

    def _make_conv(self) -> Callable:
        return torch.nn.Conv2d

    def get_last_layer(self, **kwargs):
        return self.conv_out.weight

    def forward(self, z, **kwargs):
        # assert z.shape[1:] == self.z_shape[1:]
        self.last_z_shape = z.shape

        # timestep embedding
        temb = None

        # z to block_in
        h = self.conv_in(z)

        # middle
        h = self.mid.block_1(h, temb, **kwargs)
        h = self.mid.attn_1(h, **kwargs)
        h = self.mid.block_2(h, temb, **kwargs)

        # upsampling
        for i_level in reversed(range(self.num_resolutions)):
            for i_block in range(self.num_res_blocks + 1):
                h = self.up[i_level].block[i_block](h, temb, **kwargs)
                if len(self.up[i_level].attn) > 0:
                    h = self.up[i_level].attn[i_block](h, **kwargs)
            if i_level != 0:
                h = self.up[i_level].upsample(h)

        # end
        if self.give_pre_end:
            return h

        h = self.norm_out(h)
        h = nonlinearity(h)
        h = self.conv_out(h, **kwargs)
        if self.tanh_out:
            h = torch.tanh(h)
        return h


================================================
FILE: sgm/modules/diffusionmodules/openaimodel.py
================================================
from abc import abstractmethod
from typing import Iterable

import numpy as np
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange

from ...modules.attention import SpatialTransformer
from ...modules.diffusionmodules.util import (
    avg_pool_nd,
    conv_nd,
    linear,
    normalization,
    timestep_embedding,
    zero_module,
)
from ...util import default, exists


class Timestep(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, t):
        return timestep_embedding(t, self.dim)
    

class TimestepBlock(nn.Module):
    """
    Any module where forward() takes timestep embeddings as a second argument.
    """

    @abstractmethod
    def forward(self, x, emb):
        """
        Apply the module to `x` given `emb` timestep embeddings.
        """


class TimestepEmbedSequential(nn.Sequential, TimestepBlock):
    """
    A sequential module that passes timestep embeddings to the children that
    support it as an extra input.
    """

    def forward(
        self,
        x,
        emb,
        t_context=None,
        v_context=None
    ):
        for layer in self:
            if isinstance(layer, TimestepBlock):
                x = layer(x, emb)
            elif isinstance(layer, SpatialTransformer):
                x = layer(x, t_context, v_context)
            else:
                x = layer(x)
        return x


class Upsample(nn.Module):
    """
    An upsampling layer with an optional convolution.
    :param channels: channels in the inputs and outputs.
    :param use_conv: a bool determining if a convolution is applied.
    :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then
                 upsampling occurs in the inner-two dimensions.
    """

    def __init__(
        self, channels, use_conv, dims=2, out_channels=None, padding=1, third_up=False
    ):
        super().__init__()
        self.channels = channels
        self.out_channels = out_channels or channels
        self.use_conv = use_conv
        self.dims = dims
        self.third_up = third_up
        if use_conv:
            self.conv = conv_nd(
                dims, self.channels, self.out_channels, 3, padding=padding
            )

    def forward(self, x):
        assert x.shape[1] == self.channels
        if self.dims == 3:
            t_factor = 1 if not self.third_up else 2
            x = F.interpolate(
                x,
                (t_factor * x.shape[2], x.shape[3] * 2, x.shape[4] * 2),
                mode="nearest",
            )
        else:
            x = F.interpolate(x, scale_factor=2, mode="nearest")
        if self.use_conv:
            x = self.conv(x)
        return x


class Downsample(nn.Module):
    """
    A downsampling layer with an optional convolution.
    :param channels: channels in the inputs and outputs.
    :param use_conv: a bool determining if a convolution is applied.
    :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then
                 downsampling occurs in the inner-two dimensions.
    """

    def __init__(
        self, channels, use_conv, dims=2, out_channels=None, padding=1, third_down=False
    ):
        super().__init__()
        self.channels = channels
        self.out_channels = out_channels or channels
        self.use_conv = use_conv
        self.dims = dims
        stride = 2 if dims != 3 else ((1, 2, 2) if not third_down else (2, 2, 2))
        if use_conv:
            # print(f"Building a Downsample layer with {dims} dims.")
            # print(
            #     f"  --> settings are: \n in-chn: {self.channels}, out-chn: {self.out_channels}, "
            #     f"kernel-size: 3, stride: {stride}, padding: {padding}"
            # )
            if dims == 3:
                pass
                # print(f"  --> Downsampling third axis (time): {third_down}")
            self.op = conv_nd(
                dims,
                self.channels,
                self.out_channels,
                3,
                stride=stride,
                padding=padding,
            )
        else:
            assert self.channels == self.out_channels
            self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride)

    def forward(self, x):
        assert x.shape[1] == self.channels
        return self.op(x)


class ResBlock(TimestepBlock):
    """
    A residual block that can optionally change the number of channels.
    """

    def __init__(
        self,
        channels,
        emb_channels,
        dropout,
        out_channels=None,
        use_conv=False,
        use_scale_shift_norm=False,
        dims=2,
        up=False,
        down=False,
        kernel_size=3,
        exchange_temb_dims=False,
        skip_t_emb=False
    ):
        super().__init__()
        self.channels = channels
        self.emb_channels = emb_channels
        self.dropout = dropout
        self.out_channels = out_channels or channels
        self.use_conv = use_conv
        self.use_scale_shift_norm = use_scale_shift_norm
        self.exchange_temb_dims = exchange_temb_dims

        if isinstance(kernel_size, Iterable):
            padding = [k // 2 for k in kernel_size]
        else:
            padding = kernel_size // 2

        self.in_layers = nn.Sequential(
            normalization(channels),
            nn.SiLU(),
            conv_nd(dims, channels, self.out_channels, kernel_size, padding=padding),
        )

        self.updown = up or down

        if up:
            self.h_upd = Upsample(channels, False, dims)
            self.x_upd = Upsample(channels, False, dims)
        elif down:
            self.h_upd = Downsample(channels, False, dims)
            self.x_upd = Downsample(channels, False, dims)
        else:
            self.h_upd = self.x_upd = nn.Identity()

        self.skip_t_emb = skip_t_emb
        self.emb_out_channels = (
            2 * self.out_channels if use_scale_shift_norm else self.out_channels
        )
        if self.skip_t_emb:
            print(f"Skipping timestep embedding in {self.__class__.__name__}")
            assert not self.use_scale_shift_norm
            self.emb_layers = None
            self.exchange_temb_dims = False
        else:
            self.emb_layers = nn.Sequential(
                nn.SiLU(),
                linear(
                    emb_channels,
                    self.emb_out_channels,
                ),
            )

        self.out_layers = nn.Sequential(
            normalization(self.out_channels),
            nn.SiLU(),
            nn.Dropout(p=dropout),
            zero_module(
                conv_nd(
                    dims,
                    self.out_channels,
                    self.out_channels,
                    kernel_size,
                    padding=padding,
                )
            ),
        )

        if self.out_channels == channels:
            self.skip_connection = nn.Identity()
        elif use_conv:
            self.skip_connection = conv_nd(
                dims, channels, self.out_channels, kernel_size, padding=padding
            )
        else:
            self.skip_connection = conv_nd(dims, channels, self.out_channels, 1)

    def forward(self, x, emb):
        if self.updown:
            in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1]
            h = in_rest(x)
            h = self.h_upd(h)
            x = self.x_upd(x)
            h = in_conv(h)
        else:
            h = self.in_layers(x)

        if self.skip_t_emb:
            emb_out = th.zeros_like(h)
        else:
            emb_out = self.emb_layers(emb).type(h.dtype)
        while len(emb_out.shape) < len(h.shape):
            emb_out = emb_out[..., None]
        if self.use_scale_shift_norm:
            out_norm, out_rest = self.out_layers[0], self.out_layers[1:]
            scale, shift = th.chunk(emb_out, 2, dim=1)
            h = out_norm(h) * (1 + scale) + shift
            h = out_rest(h)
        else:
            if self.exchange_temb_dims:
                emb_out = rearrange(emb_out, "b t c ... -> b c t ...")
            h = h + emb_out
            h = self.out_layers(h)
        return self.skip_connection(x) + h

        
import seaborn as sns
import matplotlib.pyplot as plt


class UnifiedUNetModel(nn.Module):

    def __init__(
        self,
        in_channels,
        ctrl_channels,
        model_channels,
        out_channels,
        num_res_blocks,
        attention_resolutions,
        dropout=0,
        channel_mult=(1, 2, 4, 8),
        save_attn_type=None,
        save_attn_layers=[],
        conv_resample=True,
        dims=2,
        use_label=None,
        num_heads=-1,
        num_head_channels=-1,
        num_heads_upsample=-1,
        use_scale_shift_norm=False,
        resblock_updown=False,
        transformer_depth=1,
        t_context_dim=None, 
        v_context_dim=None,
        num_attention_blocks=None,
        use_linear_in_transformer=False,
        adm_in_channels=None,
        transformer_depth_middle=None
    ):
        super().__init__()

        if num_heads_upsample == -1:
            num_heads_upsample = num_heads

        if num_heads == -1:
            assert (
                num_head_channels != -1
            ), "Either num_heads or num_head_channels has to be set"

        if num_head_channels == -1:
            assert (
                num_heads != -1
            ), "Either num_heads or num_head_channels has to be set"

        self.in_channels = in_channels
        self.ctrl_channels = ctrl_channels
        self.model_channels = model_channels
        self.out_channels = out_channels

        transformer_depth = len(channel_mult) * [transformer_depth]
        transformer_depth_middle = default(transformer_depth_middle, transformer_depth[-1])

        self.num_res_blocks = len(channel_mult) * [num_res_blocks]

        self.attention_resolutions = attention_resolutions
        self.dropout = dropout
        self.channel_mult = channel_mult
        self.conv_resample = conv_resample
        self.use_label = use_label
        self.num_heads = num_heads
        self.num_head_channels = num_head_channels
        self.num_heads_upsample = num_heads_upsample

        time_embed_dim = model_channels * 4
        self.time_embed = nn.Sequential(
            linear(model_channels, time_embed_dim),
            nn.SiLU(),
            linear(time_embed_dim, time_embed_dim),
        )
        
        if self.use_label is not None:
            self.label_emb = nn.Sequential(
                nn.Sequential(
                    linear(adm_in_channels, time_embed_dim),
                    nn.SiLU(),
                    linear(time_embed_dim, time_embed_dim),
                )
            )

        self.input_blocks = nn.ModuleList(
            [
                TimestepEmbedSequential(
                    conv_nd(dims, in_channels, model_channels, 3, padding=1)
                )
            ]
        )

        if self.ctrl_channels > 0:
            self.ctrl_block = TimestepEmbedSequential(
                conv_nd(dims, ctrl_channels, 16, 3, padding=1),
                nn.SiLU(),
                conv_nd(dims, 16, 16, 3, padding=1),
                nn.SiLU(),
                conv_nd(dims, 16, 32, 3, padding=1),
                nn.SiLU(),
                conv_nd(dims, 32, 32, 3, padding=1),
                nn.SiLU(),
                conv_nd(dims, 32, 96, 3, padding=1),
                nn.SiLU(),
                conv_nd(dims, 96, 96, 3, padding=1),
                nn.SiLU(),
                conv_nd(dims, 96, 256, 3, padding=1),
                nn.SiLU(),
                zero_module(conv_nd(dims, 256, model_channels, 3, padding=1))
            )
        
        self._feature_size = model_channels
        input_block_chans = [model_channels]
        ch = model_channels
        ds = 1
        for level, mult in enumerate(channel_mult):
            for nr in range(self.num_res_blocks[level]):
                layers = [
                    ResBlock(
                        ch,
                        time_embed_dim,
                        dropout,
                        out_channels=mult * model_channels,
                        dims=dims,
                        use_scale_shift_norm=use_scale_shift_norm
                    )
                ]
                ch = mult * model_channels
                if ds in attention_resolutions:
                    if num_head_channels == -1:
                        dim_head = ch // num_heads
                    else:
                        num_heads = ch // num_head_channels
                        dim_head = num_head_channels
                    if (
                        not exists(num_attention_blocks)
                        or nr < num_attention_blocks[level]
                    ):
                        layers.append(
                            SpatialTransformer(
                                ch,
                                num_heads,
                                dim_head,
                                depth=transformer_depth[level],
                                t_context_dim=t_context_dim,
                                v_context_dim=v_context_dim,
                                use_linear=use_linear_in_transformer
                            )
                        )
                self.input_blocks.append(TimestepEmbedSequential(*layers))
                self._feature_size += ch
                input_block_chans.append(ch)
            if level != len(channel_mult) - 1:
                out_ch = ch
                self.input_blocks.append(
                    TimestepEmbedSequential(
                        ResBlock(
                            ch,
                            time_embed_dim,
                            dropout,
                            out_channels=out_ch,
                            dims=dims,
                            use_scale_shift_norm=use_scale_shift_norm,
                            down=True
                        )
                        if resblock_updown
                        else Downsample(
                            ch, conv_resample, dims=dims, out_channels=out_ch
                        )
                    )
                )
                ch = out_ch
                input_block_chans.append(ch)
                ds *= 2
                self._feature_size += ch

        if num_head_channels == -1:
            dim_head = ch // num_heads
        else:
            num_heads = ch // num_head_channels
            dim_head = num_head_channels

        self.middle_block = TimestepEmbedSequential(
            ResBlock(
                ch,
                time_embed_dim,
                dropout,
                dims=dims,
                use_scale_shift_norm=use_scale_shift_norm
            ),
            SpatialTransformer(  # always uses a self-attn
                ch,
                num_heads,
                dim_head,
                depth=transformer_depth_middle,
                t_context_dim=t_context_dim,
                v_context_dim=v_context_dim,
                use_linear=use_linear_in_transformer
            ),
            ResBlock(
                ch,
                time_embed_dim,
                dropout,
                dims=dims,
                use_scale_shift_norm=use_scale_shift_norm
            )
        )

        self._feature_size += ch

        self.output_blocks = nn.ModuleList([])
        for level, mult in list(enumerate(channel_mult))[::-1]:
            for i in range(self.num_res_blocks[level] + 1):
                ich = input_block_chans.pop()
                layers = [
                    ResBlock(
                        ch + ich,
                        time_embed_dim,
                        dropout,
                        out_channels=model_channels * mult,
                        dims=dims,
                        use_scale_shift_norm=use_scale_shift_norm
                    )
                ]
                ch = model_channels * mult
                if ds in attention_resolutions:
                    if num_head_channels == -1:
                        dim_head = ch // num_heads
                    else:
                        num_heads = ch // num_head_channels
                        dim_head = num_head_channels
                    if (
                        not exists(num_attention_blocks)
                        or i < num_attention_blocks[level]
                    ):
                        layers.append(
                            SpatialTransformer(
                                ch,
                                num_heads,
                                dim_head,
                                depth=transformer_depth[level],
                                t_context_dim=t_context_dim,
                                v_context_dim=v_context_dim,
                                use_linear=use_linear_in_transformer
                            )
                        )
                if level and i == self.num_res_blocks[level]:
                    out_ch = ch
                    layers.append(
                        ResBlock(
                            ch,
                            time_embed_dim,
                            dropout,
                            out_channels=out_ch,
                            dims=dims,
                            use_scale_shift_norm=use_scale_shift_norm,
                            up=True
                        )
                        if resblock_updown
                        else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch)
                    )
                    ds //= 2
                self.output_blocks.append(TimestepEmbedSequential(*layers))
                self._feature_size += ch

        self.out = nn.Sequential(
            normalization(ch),
            nn.SiLU(),
            zero_module(conv_nd(dims, model_channels, out_channels, 3, padding=1))
        )
        
        # cache attn map
        self.attn_type = save_attn_type
        self.attn_layers = save_attn_layers
        self.attn_map_cache = []
        for name, module in self.named_modules():
            if any([name.endswith(attn_type) for attn_type in self.attn_type]):
                item = {"name": name, "heads": module.heads, "size": None, "attn_map": None}
                self.attn_map_cache.append(item)
                module.attn_map_cache = item

    def clear_attn_map(self):

        for item in self.attn_map_cache:
            if item["attn_map"] is not None:
                del item["attn_map"]
                item["attn_map"] = None

    def save_attn_map(self, attn_type="t_attn", save_name="temp", tokens=""):

        attn_maps = []
        for item in self.attn_map_cache:
            name = item["name"]
            if any([name.startswith(block) for block in self.attn_layers]) and name.endswith(attn_type):
                heads = item["heads"]
                attn_maps.append(item["attn_map"].detach().cpu())

        attn_map = th.stack(attn_maps, dim=0)
        attn_map = th.mean(attn_map, dim=0)

        # attn_map: bh * n * l
        bh, n, l = attn_map.shape # bh: batch size * heads / n : pixel length(h*w) / l: token length
        attn_map = attn_map.reshape((-1,heads,n,l)).mean(dim=1)
        b = attn_map.shape[0]

        h = w = int(n**0.5)
        attn_map = attn_map.permute(0,2,1).reshape((b,l,h,w)).numpy()
        attn_map_i = attn_map[-1]

        l = attn_map_i.shape[0]
        fig = plt.figure(figsize=(12, 8), dpi=300)
        for j in range(12):
            if j >= l: break
            ax = fig.add_subplot(3, 4, j+1)
            sns.heatmap(attn_map_i[j], square=True, xticklabels=False, yticklabels=False)
            if j < len(tokens):
                ax.set_title(tokens[j])
        fig.savefig(f"temp/attn_map/attn_map_{save_name}.png")
        plt.close()

        return attn_map_i
    
    def forward(self, x, timesteps=None, t_context=None, v_context=None, y=None, **kwargs):

        assert (y is not None) == (
            self.use_label is not None
        ), "must specify y if and only if the model is class-conditional"

        self.clear_attn_map()

        hs = []
        t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False)
        emb = self.time_embed(t_emb)

        if self.use_label is not None:
            assert y.shape[0] == x.shape[0]
            emb = emb + self.label_emb(y)

        h = x
        if self.ctrl_channels > 0:
            in_h, add_h = th.split(h, [self.in_channels, self.ctrl_channels], dim=1)
        for i, module in enumerate(self.input_blocks):
            if self.ctrl_channels > 0 and i == 0:
                h = module(in_h, emb, t_context, v_context) + self.ctrl_block(add_h, emb, t_context, v_context)
            else:
                h = module(h, emb, t_context, v_context)
            hs.append(h)
        h = self.middle_block(h, emb, t_context, v_context)
        for i, module in enumerate(self.output_blocks):
            h = th.cat([h, hs.pop()], dim=1)
            h = module(h, emb, t_context, v_context)
        h = h.type(x.dtype)

        return self.out(h)

================================================
FILE: sgm/modules/diffusionmodules/sampling.py
================================================
"""
    Partially ported from https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/sampling.py
"""


from typing import Dict, Union

import imageio
import torch
import numpy as np
import torch.nn.functional as F
from omegaconf import ListConfig, OmegaConf
from tqdm import tqdm

from ...modules.diffusionmodules.sampling_utils import (
    get_ancestral_step,
    linear_multistep_coeff,
    to_d,
    to_neg_log_sigma,
    to_sigma,
)
from ...util import append_dims, default, instantiate_from_config
from torchvision.utils import save_image

DEFAULT_GUIDER = {"target": "sgm.modules.diffusionmodules.guiders.IdentityGuider"}


class BaseDiffusionSampler:
    def __init__(
        self,
        discretization_config: Union[Dict, ListConfig, OmegaConf],
        num_steps: Union[int, None] = None,
        guider_config: Union[Dict, ListConfig, OmegaConf, None] = None,
        verbose: bool = False,
        device: str = "cuda",
    ):
        self.num_steps = num_steps
        self.discretization = instantiate_from_config(discretization_config)
        self.guider = instantiate_from_config(
            default(
                guider_config,
                DEFAULT_GUIDER,
            )
        )
        self.verbose = verbose
        self.device = device

    def prepare_sampling_loop(self, x, cond, uc=None, num_steps=None):
        sigmas = self.discretization(
            self.num_steps if num_steps is None else num_steps, device=self.device
        )
        uc = default(uc, cond)

        x *= torch.sqrt(1.0 + sigmas[0] ** 2.0)
        num_sigmas = len(sigmas)

        s_in = x.new_ones([x.shape[0]])

        return x, s_in, sigmas, num_sigmas, cond, uc

    def denoise(self, x, model, sigma, cond, uc):
        denoised = model.denoiser(model.model, *self.guider.prepare_inputs(x, sigma, cond, uc))
        denoised = self.guider(denoised, sigma)
        return denoised

    def get_sigma_gen(self, num_sigmas, init_step=0):
        sigma_generator = range(init_step, num_sigmas - 1)
        if self.verbose:
            print("#" * 30, " Sampling setting ", "#" * 30)
            print(f"Sampler: {self.__class__.__name__}")
            print(f"Discretization: {self.discretization.__class__.__name__}")
            print(f"Guider: {self.guider.__class__.__name__}")
            sigma_generator = tqdm(
                sigma_generator,
                total=num_sigmas-1-init_step,
                desc=f"Sampling with {self.__class__.__name__} for {num_sigmas-1-init_step} steps",
            )
        return sigma_generator


class SingleStepDiffusionSampler(BaseDiffusionSampler):
    def sampler_step(self, sigma, next_sigma, denoiser, x, cond, uc, *args, **kwargs):
        raise NotImplementedError

    def euler_step(self, x, d, dt):
        return x + dt * d


class EDMSampler(SingleStepDiffusionSampler):
    def __init__(
        self, s_churn=0.0, s_tmin=0.0, s_tmax=float("inf"), s_noise=1.0, *args, **kwargs
    ):
        super().__init__(*args, **kwargs)

        self.s_churn = s_churn
        self.s_tmin = s_tmin
        self.s_tmax = s_tmax
        self.s_noise = s_noise

    def sampler_step(self, sigma, next_sigma, denoiser, x, cond, uc=None, gamma=0.0):
        sigma_hat = sigma * (gamma + 1.0)
        if gamma > 0:
            eps = torch.randn_like(x) * self.s_noise
            x = x + eps * append_dims(sigma_hat**2 - sigma**2, x.ndim) ** 0.5

        denoised = self.denoise(x, denoiser, sigma_hat, cond, uc)
        d = to_d(x, sigma_hat, denoised)
        dt = append_dims(next_sigma - sigma_hat, x.ndim)

        euler_step = self.euler_step(x, d, dt)
        x = self.possible_correction_step(
            euler_step, x, d, dt, next_sigma, denoiser, cond, uc
        )
        return x

    def __call__(self, denoiser, x, cond, uc=None, num_steps=None):
        x, s_in, sigmas, num_sigmas, cond, uc = self.prepare_sampling_loop(
            x, cond, uc, num_steps
        )

        for i in self.get_sigma_gen(num_sigmas):
            gamma = (
                min(self.s_churn / (num_sigmas - 1), 2**0.5 - 1)
                if self.s_tmin <= sigmas[i] <= self.s_tmax
                else 0.0
            )
            x = self.sampler_step(
                s_in * sigmas[i],
                s_in * sigmas[i + 1],
                denoiser,
                x,
                cond,
                uc,
                gamma,
            )

        return x


class AncestralSampler(SingleStepDiffusionSampler):
    def __init__(self, eta=1.0, s_noise=1.0, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.eta = eta
        self.s_noise = s_noise
        self.noise_sampler = lambda x: torch.randn_like(x)

    def ancestral_euler_step(self, x, denoised, sigma, sigma_down):
        d = to_d(x, sigma, denoised)
        dt = append_dims(sigma_down - sigma, x.ndim)

        return self.euler_step(x, d, dt)

    def ancestral_step(self, x, sigma, next_sigma, sigma_up):
        x = torch.where(
            append_dims(next_sigma, x.ndim) > 0.0,
            x + self.noise_sampler(x) * self.s_noise * append_dims(sigma_up, x.ndim),
            x,
        )
        return x

    def __call__(self, denoiser, x, cond, uc=None, num_steps=None):
        x, s_in, sigmas, num_sigmas, cond, uc = self.prepare_sampling_loop(
            x, cond, uc, num_steps
        )

        for i in self.get_sigma_gen(num_sigmas):
            x = self.sampler_step(
                s_in * sigmas[i],
                s_in * sigmas[i + 1],
                denoiser,
                x,
                cond,
                uc,
            )

        return x


class LinearMultistepSampler(BaseDiffusionSampler):
    def __init__(
        self,
        order=4,
        *args,
        **kwargs,
    ):
        super().__init__(*args, **kwargs)

        self.order = order

    def __call__(self, denoiser, x, cond, uc=None, num_steps=None, **kwargs):
        x, s_in, sigmas, num_sigmas, cond, uc = self.prepare_sampling_loop(
            x, cond, uc, num_steps
        )

        ds = []
        sigmas_cpu = sigmas.detach().cpu().numpy()
        for i in self.get_sigma_gen(num_sigmas):
            sigma = s_in * sigmas[i]
            denoised = denoiser(
                *self.guider.prepare_inputs(x, sigma, cond, uc), **kwargs
            )
            denoised = self.guider(denoised, sigma)
            d = to_d(x, sigma, denoised)
            ds.append(d)
            if len(ds) > self.order:
                ds.pop(0)
            cur_order = min(i + 1, self.order)
            coeffs = [
                linear_multistep_coeff(cur_order, sigmas_cpu, i, j)
                for j in range(cur_order)
            ]
            x = x + sum(coeff * d for coeff, d in zip(coeffs, reversed(ds)))

        return x


class EulerEDMSampler(EDMSampler):

    def possible_correction_step(
        self, euler_step, x, d, dt, next_sigma, denoiser, cond, uc
    ):
        return euler_step

    def get_c_noise(self, x, model, sigma):
        sigma = model.denoiser.possibly_quantize_sigma(sigma)
        sigma_shape = sigma.shape
        sigma = append_dims(sigma, x.ndim)
        c_skip, c_out, c_in, c_noise = model.denoiser.scaling(sigma)
        c_noise = model.denoiser.possibly_quantize_c_noise(c_noise.reshape(sigma_shape))
        return c_noise
    
    def attend_and_excite(self, x, model, sigma, cond, batch, alpha, iter_enabled, thres, max_iter=20):

        # calc timestep
        c_noise = self.get_c_noise(x, model, sigma)
        
        x = x.clone().detach().requires_grad_(True)  # https://github.com/yuval-alaluf/Attend-and-Excite/blob/main/pipeline_attend_and_excite.py#L288

        iters = 0
        while True:

            model_output = model.model(x, c_noise, cond)
            local_loss = model.loss_fn.get_min_local_loss(model.model.diffusion_model.attn_map_cache, batch["mask"], batch["seg_mask"])
            grad = torch.autograd.grad(local_loss.requires_grad_(True), [x], retain_graph=True)[0]
            x = x - alpha * grad
            iters += 1

            if not iter_enabled or local_loss <= thres or iters > max_iter:
                break

        return x

    def save_segment_map(self, attn_maps, tokens=None, save_name=None):

        sections = []
        for i in range(len(tokens)): 
            attn_map = attn_maps[i]
            sections.append(attn_map)
        
        section = np.stack(sections)
        np.save(f"./temp/seg_map/seg_{save_name}.npy", section)

    def get_init_noise(self, cfgs, model, cond, batch, uc=None):

        H, W = batch["target_size_as_tuple"][0]
        shape = (cfgs.batch_size, cfgs.channel, int(H) // cfgs.factor, int(W) // cfgs.factor)

        randn = torch.randn(shape).to(torch.device("cuda", index=cfgs.gpu))
        x = randn.clone()

        xs = []
        self.verbose = False
        for _ in range(cfgs.noise_iters):
            
            x, s_in, sigmas, num_sigmas, cond, uc = self.prepare_sampling_loop(
                x, cond, uc, num_steps=2
            )

            superv = {
                "mask": batch["mask"] if "mask" in batch else None,
                "seg_mask": batch["seg_mask"] if "seg_mask" in batch else None
            }

            local_losses = []

            for i in self.get_sigma_gen(num_sigmas):

                gamma = (
                    min(self.s_churn / (num_sigmas - 1), 2**0.5 - 1)
                    if self.s_tmin <= sigmas[i] <= self.s_tmax
                    else 0.0
                )

                x, inter, local_loss = self.sampler_step(
                    s_in * sigmas[i],
                    s_in * sigmas[i + 1],
                    model,
                    x,
                    cond,
                    superv,
                    uc,
                    gamma,
                    save_loss=True
                )

                local_losses.append(local_loss.item())
            
            xs.append((randn, local_losses[-1]))

            randn = torch.randn(shape).to(torch.device("cuda", index=cfgs.gpu))
            x = randn.clone()

        self.verbose = True
        
        xs.sort(key = lambda x: x[-1])

        if len(xs) > 0:
            print(f"Init local loss: Best {xs[0][1]} Worst {xs[-1][1]}")
            x = xs[0][0]

        return x

    def sampler_step(self, sigma, next_sigma, model, x, cond, batch=None, uc=None, 
                     gamma=0.0, alpha=0, iter_enabled=False, thres=None, update=False,
                     name=None, save_loss=False, save_attn=False, save_inter=False):
        
        sigma_hat = sigma * (gamma + 1.0)
        if gamma > 0:
            eps = torch.randn_like(x) * self.s_noise
            x = x + eps * append_dims(sigma_hat**2 - sigma**2, x.ndim) ** 0.5
        
        if update:
            x = self.attend_and_excite(x, model, sigma_hat, cond, batch, alpha, iter_enabled, thres)

        denoised = self.denoise(x, model, sigma_hat, cond, uc)
        denoised_decode = model.decode_first_stage(denoised) if save_inter else None
        
        if save_loss:
            local_loss = model.loss_fn.get_min_local_loss(model.model.diffusion_model.attn_map_cache, batch["mask"], batch["seg_mask"])
            local_loss = local_loss[local_loss.shape[0]//2:]
        else:
            local_loss = torch.zeros(1)
        if save_attn:
            attn_map = model.model.diffusion_model.save_attn_map(save_name=name, tokens=batch["label"][0])
            self.save_segment_map(attn_map, tokens=batch["label"][0], save_name=name)

        d = to_d(x, sigma_hat, denoised)
        dt = append_dims(next_sigma - sigma_hat, x.ndim)

        euler_step = self.euler_step(x, d, dt)

        return euler_step, denoised_decode, local_loss
    
    def __call__(self, model, x, cond, batch=None, uc=None, num_steps=None, init_step=0, 
                 name=None, aae_enabled=False, detailed=False):

        x, s_in, sigmas, num_sigmas, cond, uc = self.prepare_sampling_loop(
            x, cond, uc, num_steps
        )

        name = batch["name"][0]
        inters = []
        local_losses = []
        scales = np.linspace(start=1.0, stop=0, num=num_sigmas)
        iter_lst = np.linspace(start=5, stop=25, num=6, dtype=np.int32)
        thres_lst = np.linspace(start=-0.5, stop=-0.8, num=6)

        for i in self.get_sigma_gen(num_sigmas, init_step=init_step):

            gamma = (
                min(self.s_churn / (num_sigmas - 1), 2**0.5 - 1)
                if self.s_tmin <= sigmas[i] <= self.s_tmax
                else 0.0
            )

            alpha = 20 * np.sqrt(scales[i])
            update = aae_enabled
            save_loss = aae_enabled
            save_attn = detailed and (i == (num_sigmas-1)//2)
            save_inter = aae_enabled

            if i in iter_lst:
                iter_enabled = True
                thres = thres_lst[list(iter_lst).index(i)]
            else:
                iter_enabled = False
                thres = 0.0

            x, inter, local_loss = self.sampler_step(
                s_in * sigmas[i],
                s_in * sigmas[i + 1],
                model,
                x,
                cond,
                batch,
                uc,
                gamma,
                alpha=alpha,
                iter_enabled=iter_enabled,
                thres=thres,
                update=update,
                name=name,
                save_loss=save_loss,
                save_attn=save_attn,
                save_inter=save_inter
            )

            local_losses.append(local_loss.item())
            if inter is not None:
                inter = torch.clamp((inter + 1.0) / 2.0, min=0.0, max=1.0)[0]
                inter = inter.cpu().numpy().transpose(1, 2, 0) * 255
                inters.append(inter.astype(np.uint8))

        print(f"Local losses: {local_losses}")

        if len(inters) > 0:
            imageio.mimsave(f"./temp/inters/{name}.gif", inters, 'GIF', duration=0.02)

        return x


class HeunEDMSampler(EDMSampler):
    def possible_correction_step(
        self, euler_step, x, d, dt, next_sigma, denoiser, cond, uc
    ):
        if torch.sum(next_sigma) < 1e-14:
            # Save a network evaluation if all noise levels are 0
            return euler_step
        else:
            denoised = self.denoise(euler_step, denoiser, next_sigma, cond, uc)
            d_new = to_d(euler_step, next_sigma, denoised)
            d_prime = (d + d_new) / 2.0

            # apply correction if noise level is not 0
            x = torch.where(
                append_dims(next_sigma, x.ndim) > 0.0, x + d_prime * dt, euler_step
            )
            return x


class EulerAncestralSampler(AncestralSampler):
    def sampler_step(self, sigma, next_sigma, denoiser, x, cond, uc):
        sigma_down, sigma_up = get_ancestral_step(sigma, next_sigma, eta=self.eta)
        denoised = self.denoise(x, denoiser, sigma, cond, uc)
        x = self.ancestral_euler_step(x, denoised, sigma, sigma_down)
        x = self.ancestral_step(x, sigma, next_sigma, sigma_up)

        return x


class DPMPP2SAncestralSampler(AncestralSampler):
    def get_variables(self, sigma, sigma_down):
        t, t_next = [to_neg_log_sigma(s) for s in (sigma, sigma_down)]
        h = t_next - t
        s = t + 0.5 * h
        return h, s, t, t_next

    def get_mult(self, h, s, t, t_next):
        mult1 = to_sigma(s) / to_sigma(t)
        mult2 = (-0.5 * h).expm1()
        mult3 = to_sigma(t_next) / to_sigma(t)
        mult4 = (-h).expm1()

        return mult1, mult2, mult3, mult4

    def sampler_step(self, sigma, next_sigma, denoiser, x, cond, uc=None, **kwargs):
        sigma_down, sigma_up = get_ancestral_step(sigma, next_sigma, eta=self.eta)
        denoised = self.denoise(x, denoiser, sigma, cond, uc)
        x_euler = self.ancestral_euler_step(x, denoised, sigma, sigma_down)

        if torch.sum(sigma_down) < 1e-14:
            # Save a network evaluation if all noise levels are 0
            x = x_euler
        else:
            h, s, t, t_next = self.get_variables(sigma, sigma_down)
            mult = [
                append_dims(mult, x.ndim) for mult in self.get_mult(h, s, t, t_next)
            ]

            x2 = mult[0] * x - mult[1] * denoised
            denoised2 = self.denoise(x2, denoiser, to_sigma(s), cond, uc)
            x_dpmpp2s = mult[2] * x - mult[3] * denoised2

            # apply correction if noise level is not 0
            x = torch.where(append_dims(sigma_down, x.ndim) > 0.0, x_dpmpp2s, x_euler)

        x = self.ancestral_step(x, sigma, next_sigma, sigma_up)
        return x


class DPMPP2MSampler(BaseDiffusionSampler):
    def get_variables(self, sigma, next_sigma, previous_sigma=None):
        t, t_next = [to_neg_log_sigma(s) for s in (sigma, next_sigma)]
        h = t_next - t

        if previous_sigma is not None:
            h_last = t - to_neg_log_sigma(previous_sigma)
            r = h_last / h
            return h, r, t, t_next
        else:
            return h, None, t, t_next

    def get_mult(self, h, r, t, t_next, previous_sigma):
        mult1 = to_sigma(t_next) / to_sigma(t)
        mult2 = (-h).expm1()

        if previous_sigma is not None:
            mult3 = 1 + 1 / (2 * r)
            mult4 = 1 / (2 * r)
            return mult1, mult2, mult3, mult4
        else:
            return mult1, mult2

    def sampler_step(
        self,
        old_denoised,
        previous_sigma,
        sigma,
        next_sigma,
        denoiser,
        x,
        cond,
        uc=None,
    ):
        denoised = self.denoise(x, denoiser, sigma, cond, uc)

        h, r, t, t_next = self.get_variables(sigma, next_sigma, previous_sigma)
        mult = [
            append_dims(mult, x.ndim)
            for mult in self.get_mult(h, r, t, t_next, previous_sigma)
        ]

        x_standard = mult[0] * x - mult[1] * denoised
        if old_denoised is None or torch.sum(next_sigma) < 1e-14:
            # Save a network evaluation if all noise levels are 0 or on the first step
            return x_standard, denoised
        else:
            denoised_d = mult[2] * denoised - mult[3] * old_denoised
            x_advanced = mult[0] * x - mult[1] * denoised_d

            # apply correction if noise level is not 0 and not first step
            x = torch.where(
                append_dims(next_sigma, x.ndim) > 0.0, x_advanced, x_standard
            )

        return x, denoised

    def __call__(self, denoiser, x, cond, uc=None, num_steps=None, init_step=0, **kwargs):
        x, s_in, sigmas, num_sigmas, cond, uc = self.prepare_sampling_loop(
            x, cond, uc, num_steps
        )

        old_denoised = None
        for i in self.get_sigma_gen(num_sigmas, init_step=init_step):
            x, old_denoised = self.sampler_step(
                old_denoised,
                None if i == 0 else s_in * sigmas[i - 1],
                s_in * sigmas[i],
                s_in * sigmas[i + 1],
                denoiser,
                x,
                cond,
                uc=uc,
            )

        return x


================================================
FILE: sgm/modules/diffusionmodules/sampling_utils.py
================================================
import torch
from scipy import integrate

from ...util import append_dims


class NoDynamicThresholding:
    def __call__(self, uncond, cond, scale):
        return uncond + scale * (cond - uncond)
    

def linear_multistep_coeff(order, t, i, j, epsrel=1e-4):
    if order - 1 > i:
        raise ValueError(f"Order {order} too high for step {i}")

    def fn(tau):
        prod = 1.0
        for k in range(order):
            if j == k:
                continue
            prod *= (tau - t[i - k]) / (t[i - j] - t[i - k])
        return prod

    return integrate.quad(fn, t[i], t[i + 1], epsrel=epsrel)[0]


def get_ancestral_step(sigma_from, sigma_to, eta=1.0):
    if not eta:
        return sigma_to, 0.0
    sigma_up = torch.minimum(
        sigma_to,
        eta
        * (sigma_to**2 * (sigma_from**2 - sigma_to**2) / sigma_from**2) ** 0.5,
    )
    sigma_down = (sigma_to**2 - sigma_up**2) ** 0.5
    return sigma_down, sigma_up


def to_d(x, sigma, denoised):
    return (x - denoised) / append_dims(sigma, x.ndim)


def to_neg_log_sigma(sigma):
    return sigma.log().neg()


def to_sigma(neg_log_sigma):
    return neg_log_sigma.neg().exp()


================================================
FILE: sgm/modules/diffusionmodules/sigma_sampling.py
================================================
import torch

from ...util import default, instantiate_from_config


class EDMSampling:
    def __init__(self, p_mean=-1.2, p_std=1.2):
        self.p_mean = p_mean
        self.p_std = p_std

    def __call__(self, n_samples, rand=None):
        log_sigma = self.p_mean + self.p_std * default(rand, torch.randn((n_samples,)))
        return log_sigma.exp()


class DiscreteSampling:
    def __init__(self, discretization_config, num_idx, do_append_zero=False, flip=True):
        self.num_idx = num_idx
        self.sigmas = instantiate_from_config(discretization_config)(
            num_idx, do_append_zero=do_append_zero, flip=flip
        )

    def idx_to_sigma(self, idx):
        return self.sigmas[idx]

    def __call__(self, n_samples, rand=None):
        idx = default(
            rand,
            torch.randint(0, self.num_idx, (n_samples,)),
        )
        return self.idx_to_sigma(idx)


================================================
FILE: sgm/modules/diffusionmodules/util.py
================================================
"""
adopted from
https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py
and
https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py
and
https://github.com/openai/guided-diffusion/blob/0ba878e517b276c45d1195eb29f6f5f72659a05b/guided_diffusion/nn.py

thanks!
"""

import math

import torch
import torch.nn as nn
from einops import repeat


def make_beta_schedule(
    schedule,
    n_timestep,
    linear_start=1e-4,
    linear_end=2e-2,
):
    if schedule == "linear":
        betas = (
            torch.linspace(
                linear_start**0.5, linear_end**0.5, n_timestep, dtype=torch.float64
            )
            ** 2
        )
    return betas.numpy()


def extract_into_tensor(a, t, x_shape):
    b, *_ = t.shape
    out = a.gather(-1, t)
    return out.reshape(b, *((1,) * (len(x_shape) - 1)))


def mixed_checkpoint(func, inputs: dict, params, flag):
    """
    Evaluate a function without caching intermediate activations, allowing for
    reduced memory at the expense of extra compute in the backward pass. This differs from the original checkpoint function
    borrowed from https://github.com/openai/guided-diffusion/blob/0ba878e517b276c45d1195eb29f6f5f72659a05b/guided_diffusion/nn.py in that
    it also works with non-tensor inputs
    :param func: the function to evaluate.
    :param inputs: the argument dictionary to pass to `func`.
    :param params: a sequence of parameters `func` depends on but does not
                   explicitly take as arguments.
    :param flag: if False, disable gradient checkpointing.
    """
    if flag:
        tensor_keys = [key for key in inputs if isinstance(inputs[key], torch.Tensor)]
        tensor_inputs = [
            inputs[key] for key in inputs if isinstance(inputs[key], torch.Tensor)
        ]
        non_tensor_keys = [
            key for key in inputs if not isinstance(inputs[key], torch.Tensor)
        ]
        non_tensor_inputs = [
            inputs[key] for key in inputs if not isinstance(inputs[key], torch.Tensor)
        ]
        args = tuple(tensor_inputs) + tuple(non_tensor_inputs) + tuple(params)
        return MixedCheckpointFunction.apply(
            func,
            len(tensor_inputs),
            len(non_tensor_inputs),
            tensor_keys,
            non_tensor_keys,
            *args,
        )
    else:
        return func(**inputs)


class MixedCheckpointFunction(torch.autograd.Function):
    @staticmethod
    def forward(
        ctx,
        run_function,
        length_tensors,
        length_non_tensors,
        tensor_keys,
        non_tensor_keys,
        *args,
    ):
        ctx.end_tensors = length_tensors
        ctx.end_non_tensors = length_tensors + length_non_tensors
        ctx.gpu_autocast_kwargs = {
            "enabled": torch.is_autocast_enabled(),
            "dtype": torch.get_autocast_gpu_dtype(),
            "cache_enabled": torch.is_autocast_cache_enabled(),
        }
        assert (
            len(tensor_keys) == length_tensors
            and len(non_tensor_keys) == length_non_tensors
        )

        ctx.input_tensors = {
            key: val for (key, val) in zip(tensor_keys, list(args[: ctx.end_tensors]))
        }
        ctx.input_non_tensors = {
            key: val
            for (key, val) in zip(
                non_tensor_keys, list(args[ctx.end_tensors : ctx.end_non_tensors])
            )
        }
        ctx.run_function = run_function
        ctx.input_params = list(args[ctx.end_non_tensors :])

        with torch.no_grad():
            output_tensors = ctx.run_function(
                **ctx.input_tensors, **ctx.input_non_tensors
            )
        return output_tensors

    @staticmethod
    def backward(ctx, *output_grads):
        # additional_args = {key: ctx.input_tensors[key] for key in ctx.input_tensors if not isinstance(ctx.input_tensors[key],torch.Tensor)}
        ctx.input_tensors = {
            key: ctx.input_tensors[key].detach().requires_grad_(True)
            for key in ctx.input_tensors
        }

        with torch.enable_grad(), torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs):
            # Fixes a bug where the first op in run_function modifies the
            # Tensor storage in place, which is not allowed for detach()'d
            # Tensors.
            shallow_copies = {
                key: ctx.input_tensors[key].view_as(ctx.input_tensors[key])
                for key in ctx.input_tensors
            }
            # shallow_copies.update(additional_args)
            output_tensors = ctx.run_function(**shallow_copies, **ctx.input_non_tensors)
        input_grads = torch.autograd.grad(
            output_tensors,
            list(ctx.input_tensors.values()) + ctx.input_params,
            output_grads,
            allow_unused=True,
        )
        del ctx.input_tensors
        del ctx.input_params
        del output_tensors
        return (
            (None, None, None, None, None)
            + input_grads[: ctx.end_tensors]
            + (None,) * (ctx.end_non_tensors - ctx.end_tensors)
            + input_grads[ctx.end_tensors :]
        )


def checkpoint(func, inputs, params, flag):
    """
    Evaluate a function without caching intermediate activations, allowing for
    reduced memory at the expense of extra compute in the backward pass.
    :param func: the function to evaluate.
    :param inputs: the argument sequence to pass to `func`.
    :param params: a sequence of parameters `func` depends on but does not
                   explicitly take as arguments.
    :param flag: if False, disable gradient checkpointing.
    """
    if flag:
        args = tuple(inputs) + tuple(params)
        return CheckpointFunction.apply(func, len(inputs), *args)
    else:
        return func(*inputs)


class CheckpointFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, run_function, length, *args):
        ctx.run_function = run_function
        ctx.input_tensors = list(args[:length])
        ctx.input_params = list(args[length:])
        ctx.gpu_autocast_kwargs = {
            "enabled": torch.is_autocast_enabled(),
            "dtype": torch.get_autocast_gpu_dtype(),
            "cache_enabled": torch.is_autocast_cache_enabled(),
        }
        with torch.no_grad():
            output_tensors = ctx.run_function(*ctx.input_tensors)
        return output_tensors

    @staticmethod
    def backward(ctx, *output_grads):
        ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
        with torch.enable_grad(), torch.cuda.amp.autocast(**ctx.gpu_autocast_kwargs):
            # Fixes a bug where the first op in run_function modifies the
            # Tensor storage in place, which is not allowed for detach()'d
            # Tensors.
            shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
            output_tensors = ctx.run_function(*shallow_copies)
        input_grads = torch.autograd.grad(
            output_tensors,
            ctx.input_tensors + ctx.input_params,
            output_grads,
            allow_unused=True,
        )
        del ctx.input_tensors
        del ctx.input_params
        del output_tensors
        return (None, None) + input_grads


def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False):
    """
    Create sinusoidal timestep embeddings.
    :param timesteps: a 1-D Tensor of N indices, one per batch element.
                      These may be fractional.
    :param dim: the dimension of the output.
    :param max_period: controls the minimum frequency of the embeddings.
    :return: an [N x dim] Tensor of positional embeddings.
    """
    if not repeat_only:
        half = dim // 2
        freqs = torch.exp(
            -math.log(max_period)
            * torch.arange(start=0, end=half, dtype=torch.float32)
            / half
        ).to(device=timesteps.device)
        args = timesteps[:, None].float() * freqs[None]
        embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
        if dim % 2:
            embedding = torch.cat(
                [embedding, torch.zeros_like(embedding[:, :1])], dim=-1
            )
    else:
        embedding = repeat(timesteps, "b -> b d", d=dim)
    return embedding


def zero_module(module):
    """
    Zero out the parameters of a module and return it.
    """
    for p in module.parameters():
        p.detach().zero_()
    return module


def scale_module(module, scale):
    """
    Scale the parameters of a module and return it.
    """
    for p in module.parameters():
        p.detach().mul_(scale)
    return module


def mean_flat(tensor):
    """
    Take the mean over all non-batch dimensions.
    """
    return tensor.mean(dim=list(range(1, len(tensor.shape))))


def normalization(channels):
    """
    Make a standard normalization layer.
    :param channels: number of input channels.
    :return: an nn.Module for normalization.
    """
    return GroupNorm32(32, channels)


# PyTorch 1.7 has SiLU, but we support PyTorch 1.5.
class SiLU(nn.Module):
    def forward(self, x):
        return x * torch.sigmoid(x)


class GroupNorm32(nn.GroupNorm):
    def forward(self, x):
        return super().forward(x.float()).type(x.dtype)


def conv_nd(dims, *args, **kwargs):
    """
    Create a 1D, 2D, or 3D convolution module.
    """
    if dims == 1:
        return nn.Conv1d(*args, **kwargs)
    elif dims == 2:
        return nn.Conv2d(*args, **kwargs)
    elif dims == 3:
        return nn.Conv3d(*args, **kwargs)
    raise ValueError(f"unsupported dimensions: {dims}")


def linear(*args, **kwargs):
    """
    Create a linear module.
    """
    return nn.Linear(*args, **kwargs)


def avg_pool_nd(dims, *args, **kwargs):
    """
    Create a 1D, 2D, or 3D average pooling module.
    """
    if dims == 1:
        return nn.AvgPool1d(*args, **kwargs)
    elif dims == 2:
        return nn.AvgPool2d(*args, **kwargs)
    elif dims == 3:
        return nn.AvgPool3d(*args, **kwargs)
    raise ValueError(f"unsupported dimensions: {dims}")


================================================
FILE: sgm/modules/diffusionmodules/wrappers.py
================================================
import torch
import torch.nn as nn
from packaging import version

OPENAIUNETWRAPPER = "sgm.modules.diffusionmodules.wrappers.OpenAIWrapper"


class IdentityWrapper(nn.Module):
    def __init__(self, diffusion_model, compile_model: bool = False):
        super().__init__()
        compile = (
            torch.compile
            if (version.parse(torch.__version__) >= version.parse("2.0.0"))
            and compile_model
            else lambda x: x
        )
        self.diffusion_model = compile(diffusion_model)

    def forward(self, *args, **kwargs):
        return self.diffusion_model(*args, **kwargs)


class OpenAIWrapper(IdentityWrapper):
    def forward(
        self, x: torch.Tensor, t: torch.Tensor, c: dict, **kwargs
    ) -> torch.Tensor:
        x = torch.cat((x, c.get("concat", torch.Tensor([]).type_as(x))), dim=1)
        return self.diffusion_model(
            x,
            timesteps=t,
            t_context=c.get("t_crossattn", None),
            v_context=c.get("v_crossattn", None),
            y=c.get("vector", None),
            **kwargs
        )


================================================
FILE: sgm/modules/distributions/__init__.py
================================================


================================================
FILE: sgm/modules/distributions/distributions.py
================================================
import torch
import numpy as np


class AbstractDistribution:
    def sample(self):
        raise NotImplementedError()

    def mode(self):
        raise NotImplementedError()


class DiracDistribution(AbstractDistribution):
    def __init__(self, value):
        self.value = value

    def sample(self):
        return self.value

    def mode(self):
        return self.value


class DiagonalGaussianDistribution(object):
    def __init__(self, parameters, deterministic=False):
        self.parameters = parameters
        self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
        self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
        self.deterministic = deterministic
        self.std = torch.exp(0.5 * self.logvar)
        self.var = torch.exp(self.logvar)
        if self.deterministic:
            self.var = self.std = torch.zeros_like(self.mean).to(
                device=self.parameters.device
            )

    def sample(self):
        x = self.mean + self.std * torch.randn(self.mean.shape).to(
            device=self.parameters.device
        )
        return x

    def kl(self, other=None):
        if self.deterministic:
            return torch.Tensor([0.0])
        else:
            if other is None:
                return 0.5 * torch.sum(
                    torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar,
                    dim=[1, 2, 3],
                )
            else:
                return 0.5 * torch.sum(
                    torch.pow(self.mean - other.mean, 2) / other.var
                    + self.var / other.var
                    - 1.0
                    - self.logvar
                    + other.logvar,
                    dim=[1, 2, 3],
                )

    def nll(self, sample, dims=[1, 2, 3]):
        if self.deterministic:
            return torch.Tensor([0.0])
        logtwopi = np.log(2.0 * np.pi)
        return 0.5 * torch.sum(
            logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
            dim=dims,
        )

    def mode(self):
        return self.mean


def normal_kl(mean1, logvar1, mean2, logvar2):
    """
    source: https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/losses.py#L12
    Compute the KL divergence between two gaussians.
    Shapes are automatically broadcasted, so batches can be compared to
    scalars, among other use cases.
    """
    tensor = None
    for obj in (mean1, logvar1, mean2, logvar2):
        if isinstance(obj, torch.Tensor):
            tensor = obj
            break
    assert tensor is not None, "at least one argument must be a Tensor"

    # Force variances to be Tensors. Broadcasting helps convert scalars to
    # Tensors, but it does not work for torch.exp().
    logvar1, logvar2 = [
        x if isinstance(x, torch.Tensor) else torch.tensor(x).to(tensor)
        for x in (logvar1, logvar2)
    ]

    return 0.5 * (
        -1.0
        + logvar2
        - logvar1
        + torch.exp(logvar1 - logvar2)
        + ((mean1 - mean2) ** 2) * torch.exp(-logvar2)
    )


================================================
FILE: sgm/modules/ema.py
================================================
import torch
from torch import nn


class LitEma(nn.Module):
    def __init__(self, model, decay=0.9999, use_num_upates=True):
        super().__init__()
        if decay < 0.0 or decay > 1.0:
            raise ValueError("Decay must be between 0 and 1")

        self.m_name2s_name = {}
        self.register_buffer("decay", torch.tensor(decay, dtype=torch.float32))
        self.register_buffer(
            "num_updates",
            torch.tensor(0, dtype=torch.int)
            if use_num_upates
            else torch.tensor(-1, dtype=torch.int),
        )

        for name, p in model.named_parameters():
            if p.requires_grad:
                # remove as '.'-character is not allowed in buffers
                s_name = name.replace(".", "")
                self.m_name2s_name.update({name: s_name})
                self.register_buffer(s_name, p.clone().detach().data)

        self.collected_params = []

    def reset_num_updates(self):
        del self.num_updates
        self.register_buffer("num_updates", torch.tensor(0, dtype=torch.int))

    def forward(self, model):
        decay = self.decay

        if self.num_updates >= 0:
            self.num_updates += 1
            decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))

        one_minus_decay = 1.0 - decay

        with torch.no_grad():
            m_param = dict(model.named_parameters())
            shadow_params = dict(self.named_buffers())

            for key in m_param:
                if m_param[key].requires_grad:
                    sname = self.m_name2s_name[key]
                    shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
                    shadow_params[sname].sub_(
                        one_minus_decay * (shadow_params[sname] - m_param[key])
                    )
                else:
                    assert not key in self.m_name2s_name

    def copy_to(self, model):
        m_param = dict(model.named_parameters())
        shadow_params = dict(self.named_buffers())
        for key in m_param:
            if m_param[key].requires_grad:
                m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
            else:
                assert not key in self.m_name2s_name

    def store(self, parameters):
        """
        Save the current parameters for restoring later.
        Args:
          parameters: Iterable of `torch.nn.Parameter`; the parameters to be
            temporarily stored.
        """
        self.collected_params = [param.clone() for param in parameters]

    def restore(self, parameters):
        """
        Restore the parameters stored with the `store` method.
        Useful to validate the model with EMA parameters without affecting the
        original optimization process. Store the parameters before the
        `copy_to` method. After validation (or model saving), use this to
        restore the former parameters.
        Args:
          parameters: Iterable of `torch.nn.Parameter`; the parameters to be
            updated with the stored parameters.
        """
        for c_param, param in zip(self.collected_params, parameters):
            param.data.copy_(c_param.data)


================================================
FILE: sgm/modules/encoders/__init__.py
================================================


================================================
FILE: sgm/modules/encoders/modules.py
================================================
from contextlib import nullcontext
from functools import partial
from typing import Dict, List, Optional, Tuple, Union

import kornia
import numpy as np
import open_clip
import torch
import torch.nn as nn
from einops import rearrange, repeat
from omegaconf import ListConfig
from torch.utils.checkpoint import checkpoint
from transformers import (
    ByT5Tokenizer,
    CLIPTextModel,
    CLIPTokenizer,
    CLIPVisionModel,
    T5EncoderModel,
    T5Tokenizer,
)

from ...modules.autoencoding.regularizers import DiagonalGaussianRegularizer
from ...modules.diffusionmodules.model import Encoder
from ...modules.diffusionmodules.openaimodel import Timestep
from ...modules.diffusionmodules.util import extract_into_tensor, make_beta_schedule
from ...modules.distributions.distributions import DiagonalGaussianDistribution
from ...util import (
    autocast,
    count_params,
    default,
    disabled_train,
    expand_dims_like,
    instantiate_from_config,
)

import math
import string
import pytorch_lightning as pl
from torchvision import transforms
from timm.models.vision_transformer import VisionTransformer
from safetensors.torch import load_file as load_safetensors
from torchvision.utils import save_image

# disable warning
from transformers import logging
logging.set_verbosity_error()

class AbstractEmbModel(nn.Module):
    def __init__(self):
        super().__init__()
        self._is_trainable = None
        self._ucg_rate = None
        self._input_key = None
        self._emb_key = None

    @property
    def is_trainable(self) -> bool:
        return self._is_trainable

    @property
    def ucg_rate(self) -> Union[float, torch.Tensor]:
        return self._ucg_rate

    @property
    def input_key(self) -> str:
        return self._input_key

    @property
    def emb_key(self) -> str:
        return self._emb_key

    @is_trainable.setter
    def is_trainable(self, value: bool):
        self._is_trainable = value

    @ucg_rate.setter
    def ucg_rate(self, value: Union[float, torch.Tensor]):
        self._ucg_rate = value

    @input_key.setter
    def input_key(self, value: str):
        self._input_key = value

    @emb_key.setter
    def emb_key(self, value: str):
        self._emb_key = value

    @is_trainable.deleter
    def is_trainable(self):
        del self._is_trainable

    @ucg_rate.deleter
    def ucg_rate(self):
        del self._ucg_rate

    @input_key.deleter
    def input_key(self):
        del self._input_key

    @emb_key.deleter
    def emb_key(self):
        del self._emb_key


class GeneralConditioner(nn.Module):
    
    OUTPUT_DIM2KEYS = {2: "vector", 3: "crossattn", 4: "concat", 5: "concat"}
    KEY2CATDIM = {"vector": 1, "crossattn": 2, "concat": 1}

    def __init__(self, emb_models: Union[List, ListConfig]):
        super().__init__()
        embedders = []
        for n, embconfig in enumerate(emb_models):
            embedder = instantiate_from_config(embconfig)
            assert isinstance(
                embedder, AbstractEmbModel
            ), f"embedder model {embedder.__class__.__name__} has to inherit from AbstractEmbModel"
            embedder.is_trainable = embconfig.get("is_trainable", False)
            embedder.ucg_rate = embconfig.get("ucg_rate", 0.0)
            if not embedder.is_trainable:
                embedder.train = disabled_train
                embedder.freeze()
            print(
                f"Initialized embedder #{n}: {embedder.__class__.__name__} "
                f"with {count_params(embedder, False)} params. Trainable: {embedder.is_trainable}"
            )
            if "emb_key" in embconfig:
                embedder.emb_key = embconfig["emb_key"]
            if "input_key" in embconfig:
                embedder.input_key = embconfig["input_key"]
            elif "input_keys" in embconfig:
                embedder.input_keys = embconfig["input_keys"]
            else:
                raise KeyError(
                    f"need either 'input_key' or 'input_keys' for embedder {embedder.__class__.__name__}"
                )

            embedder.legacy_ucg_val = embconfig.get("legacy_ucg_value", None)
            if embedder.legacy_ucg_val is not None:
                embedder.ucg_prng = np.random.RandomState()

            embedders.append(embedder)
        self.embedders = nn.ModuleList(embedders)

    def possibly_get_ucg_val(self, embedder: AbstractEmbModel, batch: Dict) -> Dict:
        assert embedder.legacy_ucg_val is not None
        p = embedder.ucg_rate
        val = embedder.legacy_ucg_val
        for i in range(len(batch[embedder.input_key])):
            if embedder.ucg_prng.choice(2, p=[1 - p, p]):
                batch[embedder.input_key][i] = val
        return batch

    def forward(
        self, batch: Dict, force_zero_embeddings: Optional[List] = None
    ) -> Dict:
        output = dict()
        if force_zero_embeddings is None:
            force_zero_embeddings = []
        for embedder in self.embedders:
            embedding_context = nullcontext if embedder.is_trainable else torch.no_grad
            with embedding_context():
                if hasattr(embedder, "input_key") and (embedder.input_key is not None):
                    if embedder.legacy_ucg_val is not None:
                        batch = self.possibly_get_ucg_val(embedder, batch)
                    emb_out = embedder(batch[embedder.input_key])
                elif hasattr(embedder, "input_keys"):
                    emb_out = embedder(*[batch[k] for k in embedder.input_keys])
            assert isinstance(
                emb_out, (torch.Tensor, list, tuple)
            ), f"encoder outputs must be tensors or a sequence, but got {type(emb_out)}"
            if not isinstance(emb_out, (list, tuple)):
                emb_out = [emb_out]
            for emb in emb_out:
                if embedder.emb_key is not None:
                    out_key = embedder.emb_key
                else:
                    out_key = self.OUTPUT_DIM2KEYS[emb.dim()]
                if embedder.ucg_rate > 0.0 and embedder.legacy_ucg_val is None:
                    emb = (
                        expand_dims_like(
                            torch.bernoulli(
                                (1.0 - embedder.ucg_rate)
                                * torch.ones(emb.shape[0], device=emb.device)
                            ),
                            emb,
                        )
                        * emb
                    )
                if (
                    hasattr(embedder, "input_key")
                    and embedder.input_key in force_zero_embeddings
                ):
                    emb = torch.zeros_like(emb)
                if out_key in output:
                    output[out_key] = torch.cat(
                        (output[out_key], emb), self.KEY2CATDIM[out_key]
                    )
                else:
                    output[out_key] = emb
        return output

    def get_unconditional_conditioning(
        self, batch_c, batch_uc=None, force_uc_zero_embeddings=None
    ):
        if force_uc_zero_embeddings is None:
            force_uc_zero_embeddings = []
        ucg_rates = list()
        for embedder in self.embedders:
            ucg_rates.append(embedder.ucg_rate)
            embedder.ucg_rate = 0.0
        c = self(batch_c)
        uc = self(batch_c if batch_uc is None else batch_uc, force_uc_zero_embeddings)

        for embedder, rate in zip(self.embedders, ucg_rates):
            embedder.ucg_rate = rate
        return c, uc
    

class InceptionV3(nn.Module):
    """Wrapper around the https://github.com/mseitzer/pytorch-fid inception
    port with an additional squeeze at the end"""

    def __init__(self, normalize_input=False, **kwargs):
        super().__init__()
        from pytorch_fid import inception

        kwargs["resize_input"] = True
        self.model = inception.InceptionV3(normalize_input=normalize_input, **kwargs)

    def forward(self, inp):
        # inp = kornia.geometry.resize(inp, (299, 299),
        #                              interpolation='bicubic',
        #                              align_corners=False,
        #                              antialias=True)
        # inp = inp.clamp(min=-1, max=1)

        outp = self.model(inp)

        if len(outp) == 1:
            return outp[0].squeeze()

        return outp


class IdentityEncoder(AbstractEmbModel):
    def encode(self, x):
        return x
    def freeze(self):
        return
    def forward(self, x):
        return x


class ClassEmbedder(AbstractEmbModel):
    def __init__(self, embed_dim, n_classes=1000, add_sequence_dim=False):
        super().__init__()
        self.embedding = nn.Embedding(n_classes, embed_dim)
        self.n_classes = n_classes
        self.add_sequence_dim = add_sequence_dim

    def forward(self, c):
        c = self.embedding(c)
        if self.add_sequence_dim:
            c = c[:, None, :]
        return c

    def get_unconditional_conditioning(self, bs, device="cuda"):
        uc_class = (
            self.n_classes - 1
        )  # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
        uc = torch.ones((bs,), device=device) * uc_class
        uc = {self.key: uc.long()}
        return uc


class ClassEmbedderForMultiCond(ClassEmbedder):
    def forward(self, batch, key=None, disable_dropout=False):
        out = batch
        key = default(key, self.key)
        islist = isinstance(batch[key], list)
        if islist:
            batch[key] = batch[key][0]
        c_out = super().forward(batch, key, disable_dropout)
        out[key] = [c_out] if islist else c_out
        return out


class FrozenT5Embedder(AbstractEmbModel):
    """Uses the T5 transformer encoder for text"""

    def __init__(
        self, version="google/t5-v1_1-xxl", device="cuda", max_length=77, freeze=True
    ):  # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
        super().__init__()
        self.tokenizer = T5Tokenizer.from_pretrained(version)
        self.transformer = T5EncoderModel.from_pretrained(version)
        self.device = device
        self.max_length = max_length
        if freeze:
            self.freeze()

    def freeze(self):
        self.transformer = self.transformer.eval()

        for param in self.parameters():
            param.requires_grad = False

    # @autocast
    def forward(self, text):
        batch_encoding = self.tokenizer(
            text,
            truncation=True,
            max_length=self.max_length,
            return_length=True,
            return_overflowing_tokens=False,
            padding="max_length",
            return_tensors="pt",
        )
        tokens = batch_encoding["input_ids"].to(self.device)
        with torch.autocast("cuda", enabled=False):
            outputs = self.transformer(input_ids=tokens)
        z = outputs.last_hidden_state
        return z

    def encode(self, text):
        return self(text)


class FrozenByT5Embedder(AbstractEmbModel):
    """
    Uses the ByT5 transformer encoder for text. Is character-aware.
    """

    def __init__(
        self, version="google/byt5-base", device="cuda", max_length=77, freeze=True, *args, **kwargs
    ):  # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
        super().__init__(*args, **kwargs)
        self.tokenizer = ByT5Tokenizer.from_pretrained(version)
        self.transformer = T5EncoderModel.from_pretrained(version)
        self.device = device
        self.max_length = max_length
        if freeze:
            self.freeze()

    def freeze(self):
        self.transformer = self.transformer.eval()
        for param in self.parameters():
            param.requires_grad = False

    def forward(self, text):
        batch_encoding = self.tokenizer(
            text,
            truncation=True,
            max_length=self.max_length,
            return_length=True,
            return_overflowing_tokens=False,
            padding="max_length",
            return_tensors="pt",
        )
        tokens = batch_encoding["input_ids"].to(next(self.parameters()).device)
        with torch.autocast("cuda", enabled=False):
            outputs = self.transformer(input_ids=tokens)
        z = outputs.last_hidden_state # l, 1536
        return z

    def encode(self, text):
        return self(text)


class FrozenCLIPEmbedder(AbstractEmbModel):
    """Uses the CLIP transformer encoder for text (from huggingface)"""

    LAYERS = ["last", "pooled", "hidden"]

    def __init__(
        self,
        version="openai/clip-vit-large-patch14",
        device="cuda",
        max_length=77,
        freeze=True,
        layer="last",
        layer_idx=None,
        always_return_pooled=False,
    ):  # clip-vit-base-patch32
        super().__init__()
        assert layer in self.LAYERS
        self.tokenizer = CLIPTokenizer.from_pretrained(version)
        self.transformer = CLIPTextModel.from_pretrained(version)
        self.device = device
        self.max_length = max_length
        if freeze:
            self.freeze()
        self.layer = layer
        self.layer_idx = layer_idx
        self.return_pooled = always_return_pooled
        if layer == "hidden":
            assert layer_idx is not None
            assert 0 <= abs(layer_idx) <= 12

    def freeze(self):
        self.transformer = self.transformer.eval()
        for param in self.parameters():
            param.requires_grad = False

    @autocast
    def forward(self, text):
        batch_encoding = self.tokenizer(
            text,
            truncation=True,
            max_length=self.max_length,
            return_length=True,
            return_overflowing_tokens=False,
            padding="max_length",
            return_tensors="pt",
        )
        device = next(self.transformer.parameters()).device
        tokens = batch_encoding["input_ids"].to(device)
        outputs = self.transformer(
            input_ids=tokens, output_hidden_states=self.layer == "hidden"
        )
        if self.layer == "last":
            z = outputs.last_hidden_state
        elif self.layer == "pooled":
            z = outputs.pooler_output[:, None, :]
        else:
            z = outputs.hidden_states[self.layer_idx]
        if self.return_pooled:
            return z, outputs.pooler_output
        return z

    def encode(self, text):
        return self(text)


class FrozenOpenCLIPEmbedder2(AbstractEmbModel):
    """
    Uses the OpenCLIP transformer encoder for text
    """

    LAYERS = ["pooled", "last", "penultimate"]

    def __init__(
        self,
        arch="ViT-H-14",
        version="laion2b_s32b_b79k",
        device="cuda",
        max_length=77,
        freeze=True,
        layer="last",
        always_return_pooled=False,
        legacy=True,
    ):
        super().__init__()
        assert layer in self.LAYERS
        model, _, _ = open_clip.create_model_and_transforms(
            arch,
            device=torch.device("cpu"),
            pretrained=version,
        )
        del model.visual
        self.model = model

        self.device = device
        self.max_length = max_length
        self.return_pooled = always_return_pooled
        if freeze:
            self.freeze()
        self.layer = layer
        if self.layer == "last":
            self.layer_idx = 0
        elif self.layer == "penultimate":
            self.layer_idx = 1
        else:
            raise NotImplementedError()
        self.legacy = legacy

    def freeze(self):
        self.model = self.model.eval()
        for param in self.parameters():
            param.requires_grad = False

    @autocast
    def forward(self, text):
        device = next(self.model.parameters()).device
        tokens = open_clip.tokenize(text)
        z = self.encode_with_transformer(tokens.to(device))
        if not self.return_pooled and self.legacy:
            return z
        if self.return_pooled:
            assert not self.legacy
            return z[self.layer], z["pooled"]
        return z[self.layer]

    def encode_with_transformer(self, text):
        x = self.model.token_embedding(text)  # [batch_size, n_ctx, d_model]
        x = x + self.model.positional_embedding
        x = x.permute(1, 0, 2)  # NLD -> LND
        x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask)
        if self.legacy:
            x = x[self.layer]
            x = self.model.ln_final(x)
            return x
        else:
            # x is a dict and will stay a dict
            o = x["last"]
            o = self.model.ln_final(o)
            pooled = self.pool(o, text)
            x["pooled"] = pooled
            return x

    def pool(self, x, text):
        # take features from the eot embedding (eot_token is the highest number in each sequence)
        x = (
            x[torch.arange(x.shape[0]), text.argmax(dim=-1)]
            @ self.model.text_projection
        )
        return x

    def text_transformer_forward(self, x: torch.Tensor, attn_mask=None):
        outputs = {}
        for i, r in enumerate(self.model.transformer.resblocks):
            if i == len(self.model.transformer.resblocks) - 1:
                outputs["penultimate"] = x.permute(1, 0, 2)  # LND -> NLD
            if (
                self.model.transformer.grad_checkpointing
                and not torch.jit.is_scripting()
            ):
                x = checkpoint(r, x, attn_mask)
            else:
                x = r(x, attn_mask=attn_mask)
        outputs["last"] = x.permute(1, 0, 2)  # LND -> NLD
        return outputs

    def encode(self, text):
        return self(text)


class FrozenOpenCLIPEmbedder(AbstractEmbModel):
    LAYERS = [
        # "pooled",
        "last",
        "penultimate",
    ]

    def __init__(
        self,
        arch="ViT-H-14",
        version="laion2b_s32b_b79k",
        device="cuda",
        max_length=77,
        freeze=True,
        layer="last",
    ):
        super().__init__()
        assert layer in self.LAYERS
        model, _, _ = open_clip.create_model_and_transforms(
            arch, device=torch.device("cpu"), pretrained=version
        )
        del model.visual
        self.model = model

        self.device = device
        self.max_length = max_length
        if freeze:
            self.freeze()
        self.layer = layer
        if self.layer == "last":
            self.layer_idx = 0
        elif self.layer == "penultimate":
            self.layer_idx = 1
        else:
            raise NotImplementedError()

    def freeze(self):
        self.model = self.model.eval()
        for param in self.parameters():
            param.requires_grad = False

    def forward(self, text):
        device = next(self.model.parameters()).device
        tokens = open_clip.tokenize(text)
        z = self.encode_with_transformer(tokens.to(device))
        return z

    def encode_with_transformer(self, text):
        x = self.model.token_embedding(text)  # [batch_size, n_ctx, d_model]
        x = x + self.model.positional_embedding
        x = x.permute(1, 0, 2)  # NLD -> LND
        x = self.text_transformer_forward(x, attn_mask=self.model.attn_mask)
        x = x.permute(1, 0, 2)  # LND -> NLD
        x = self.model.ln_final(x)
        return x

    def text_transformer_forward(self, x: torch.Tensor, attn_mask=None):
        for i, r in enumerate(self.model.transformer.resblocks):
            if i == len(self.model.transformer.resblocks) - self.layer_idx:
                break
            if (
                self.model.transformer.grad_checkpointing
                and not torch.jit.is_scripting()
            ):
                x = checkpoint(r, x, attn_mask)
            else:
                x = r(x, attn_mask=attn_mask)
        return x

    def encode(self, text):
        return self(text)


class FrozenOpenCLIPImageEmbedder(AbstractEmbModel):
    """
    Uses the OpenCLIP vision transformer encoder for images
    """

    def __init__(
        self,
        arch="ViT-H-14",
        version="laion2b_s32b_b79k",
        device="cuda",
        max_length=77,
        freeze=True,
        antialias=True,
        ucg_rate=0.0,
        unsqueeze_dim=False,
        repeat_to_max_len=False,
        num_image_crops=0,
        output_tokens=False,
    ):
        super().__init__()
        model, _, _ = open_clip.create_model_and_transforms(
            arch,
            device=torch.device("cpu"),
            pretrained=version,
        )
        del model.transformer
        self.model = model
        self.max_crops = num_image_crops
        self.pad_to_max_len = self.max_crops > 0
        self.repeat_to_max_len = repeat_to_max_len and (not self.pad_to_max_len)
        self.device = device
        self.max_length = max_length
        if freeze:
            self.freeze()

        self.antialias = antialias

        self.register_buffer(
            "mean", torch.Tensor([0.48145466, 0.4578275, 0.40821073]), persistent=False
        )
        self.register_buffer(
            "std", torch.Tensor([0.26862954, 0.26130258, 0.27577711]), persistent=False
        )
        self.ucg_rate = ucg_rate
        self.unsqueeze_dim = unsqueeze_dim
        self.stored_batch = None
        self.model.visual.output_tokens = output_tokens
        self.output_tokens = output_tokens

    def preprocess(self, x):
        # normalize to [0,1]
        x = kornia.geometry.resize(
            x,
            (224, 224),
            interpolation="bicubic",
            align_corners=True,
            antialias=self.antialias,
        )
        x = (x + 1.0) / 2.0
        # renormalize according to clip
        x = kornia.enhance.normalize(x, self.mean, self.std)
        return x

    def freeze(self):
        self.model = self.model.eval()
        for param in self.parameters():
            param.requires_grad = False

    @autocast
    def forward(self, image, no_dropout=False):
        z = self.encode_with_vision_transformer(image)
        tokens = None
        if self.output_tokens:
            z, tokens = z[0], z[1]
        z = z.to(image.dtype)
        # if self.ucg_rate > 0.0 and not no_dropout and not (self.max_crops > 0):
        #     z = (
        #         torch.bernoulli(
        #             (1.0 - self.ucg_rate) * torch.ones(z.shape[0], device=z.device)
        #         )[:, None]
        #         * z
        #     )
        #     if tokens is not None:
        #         tokens = (
        #             expand_dims_like(
        #                 torch.bernoulli(
        #                     (1.0 - self.ucg_rate)
        #                     * torch.ones(tokens.shape[0], device=tokens.device)
        #                 ),
        #                 tokens,
        #             )
        #             * tokens
        #         )
        if self.unsqueeze_dim:
            z = z[:, None, :]
        if self.output_tokens:
            assert not self.repeat_to_max_len
            assert not self.pad_to_max_len
            return tokens, z
        if self.repeat_to_max_len:
            if z.dim() == 2:
                z_ = z[:, None, :]
            else:
                z_ = z
            return repeat(z_, "b 1 d -> b n d", n=self.max_length), z
        elif self.pad_to_max_len:
            assert z.dim() == 3
            z_pad = torch.cat(
                (
                    z,
                    torch.zeros(
                        z.shape[0],
                        self.max_length - z.shape[1],
                        z.shape[2],
                        device=z.device,
                    ),
                ),
                1,
            )
            return z_pad, z_pad[:, 0, ...]
        return z

    def encode_with_vision_transformer(self, img):
        # if self.max_crops > 0:
        #    img = self.preprocess_by_cropping(img)
        if img.dim() == 5:
            assert self.max_crops == img.shape[1]
            img = rearrange(img, "b n c h w -> (b n) c h w")
        img = self.preprocess(img)
        if not self.output_tokens:
            assert not self.model.visual.output_tokens
            x = self.model.visual(img)
            tokens = None
        else:
            assert self.model.visual.output_tokens
            x, tokens = self.model.visual(img)
        if self.max_crops > 0:
            x = rearrange(x, "(b n) d -> b n d", n=self.max_crops)
            # drop out between 0 and all along the sequence axis
            x = (
                torch.bernoulli(
                    (1.0 - self.ucg_rate)
                    * torch.ones(x.shape[0], x.shape[1], 1, device=x.device)
                )
                * x
            )
            if tokens is not None:
                tokens = rearrange(tokens, "(b n) t d -> b t (n d)", n=self.max_crops)
                print(
                    f"You are running very experimental token-concat in {self.__class__.__name__}. "
                    f"Check what you are doing, and then remove this message."
                )
        if self.output_tokens:
            return x, tokens
        return x

    def encode(self, text):
        return self(text)


class FrozenCLIPT5Encoder(AbstractEmbModel):
    def __init__(
        self,
        clip_version="openai/clip-vit-large-patch14",
        t5_version="google/t5-v1_1-xl",
        device="cuda",
        clip_max_length=77,
        t5_max_length=77,
    ):
        super().__init__()
        self.clip_encoder = FrozenCLIPEmbedder(
            clip_version, device, max_length=clip_max_length
        )
        self.t5_encoder = FrozenT5Embedder(t5_version, device, max_length=t5_max_length)
        print(
            f"{self.clip_encoder.__class__.__name__} has {count_params(self.clip_encoder) * 1.e-6:.2f} M parameters, "
            f"{self.t5_encoder.__class__.__name__} comes with {count_params(self.t5_encoder) * 1.e-6:.2f} M params."
        )

    def encode(self, text):
        return self(text)

    def forward(self, text):
        clip_z = self.clip_encoder.encode(text)
        t5_z = self.t5_encoder.encode(text)
        return [clip_z, t5_z]


class SpatialRescaler(AbstractEmbModel):
    def __init__(
        self,
        n_stages=1,
        method="bilinear",
        multiplier=0.5,
        in_channels=3,
        out_channels=None,
        bias=False,
        wrap_video=False,
        kernel_size=1,
        remap_output=False,
    ):
        super().__init__()
        self.n_stages = n_stages
        assert self.n_stages >= 0
        assert method in [
            "nearest",
            "linear",
            "bilinear",
            "trilinear",
            "bicubic",
            "area",
        ]
        self.multiplier = multiplier
        self.interpolator = partial(torch.nn.functional.interpolate, mode=method)
        self.remap_output = out_channels is not None or remap_output
        if self.remap_output:
            print(
                f"Spatial Rescaler mapping from {in_channels} to {out_channels} channels after resizing."
            )
            self.channel_mapper = nn.Conv2d(
                in_channels,
                out_channels,
                kernel_size=kernel_size,
                bias=bias,
                padding=kernel_size // 2,
            )
        self.wrap_video = wrap_video
    
    def freeze(self):
        pass

    def forward(self, x):
        if self.wrap_video and x.ndim == 5:
            B, C, T, H, W = x.shape
            x = rearrange(x, "b c t h w -> b t c h w")
            x = rearrange(x, "b t c h w -> (b t) c h w")

        for stage in range(self.n_stages):
            x = self.interpolator(x, scale_factor=self.multiplier)

        if self.wrap_video:
            x = rearrange(x, "(b t) c h w -> b t c h w", b=B, t=T, c=C)
            x = rearrange(x, "b t c h w -> b c t h w")
        if self.remap_output:
            x = self.channel_mapper(x)
        return x

    def encode(self, x):
        return self(x)


class LowScaleEncoder(nn.Module):
    def __init__(
        self,
        model_config,
        linear_start,
        linear_end,
        timesteps=1000,
        max_noise_level=250,
        output_size=64,
        scale_factor=1.0,
    ):
        super().__init__()
        self.max_noise_level = max_noise_level
        self.model = instantiate_from_config(model_config)
        self.augmentation_schedule = self.register_schedule(
            timesteps=timesteps, linear_start=linear_start, linear_end=linear_end
        )
        self.out_size = output_size
        self.scale_factor = scale_factor

    def register_schedule(
        self,
        beta_schedule="linear",
        timesteps=1000,
        linear_start=1e-4,
        linear_end=2e-2,
        cosine_s=8e-3,
    ):
        betas = make_beta_schedule(
            beta_schedule,
            timesteps,
            linear_start=linear_start,
            linear_end=linear_end,
            cosine_s=cosine_s,
        )
        alphas = 1.0 - betas
        alphas_cumprod = np.cumprod(alphas, axis=0)
        alphas_cumprod_prev = np.append(1.0, alphas_cumprod[:-1])

        (timesteps,) = betas.shape
        self.num_timesteps = int(timesteps)
        self.linear_start = linear_start
        self.linear_end = linear_end
        assert (
            alphas_cumprod.shape[0] == self.num_timesteps
        ), "alphas have to be defined for each timestep"

        to_torch = partial(torch.tensor, dtype=torch.float32)

        self.register_buffer("betas", to_torch(betas))
        self.register_buffer("alphas_cumprod", to_torch(alphas_cumprod))
        self.register_buffer("alphas_cumprod_prev", to_torch(alphas_cumprod_prev))

        # calculations for diffusion q(x_t | x_{t-1}) and others
        self.register_buffer("sqrt_alphas_cumprod", to_torch(np.sqrt(alphas_cumprod)))
        self.register_buffer(
            "sqrt_one_minus_alphas_cumprod", to_torch(np.sqrt(1.0 - alphas_cumprod))
        )
        self.register_buffer(
            "log_one_minus_alphas_cumprod", to_torch(np.log(1.0 - alphas_cumprod))
        )
        self.register_buffer(
            "sqrt_recip_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod))
        )
        self.register_buffer(
            "sqrt_recipm1_alphas_cumprod", to_torch(np.sqrt(1.0 / alphas_cumprod - 1))
        )

    def q_sample(self, x_start, t, noise=None):
        noise = default(noise, lambda: torch.randn_like(x_start))
        return (
            extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start
            + extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape)
            * noise
        )

    def forward(self, x):
        z = self.model.encode(x)
        if isinstance(z, DiagonalGaussianDistribution):
            z = z.sample()
        z = z * self.scale_factor
        noise_level = torch.randint(
            0, self.max_noise_level, (x.shape[0],), device=x.device
        ).long()
        z = self.q_sample(z, noise_level)
        if self.out_size is not None:
            z = torch.nn.functional.interpolate(z, size=self.out_size, mode="nearest")
        # z = z.repeat_interleave(2, -2).repeat_interleave(2, -1)
        return z, noise_level

    def decode(self, z):
        z = z / self.scale_factor
        return self.model.decode(z)


class ConcatTimestepEmbedderND(AbstractEmbModel):
    """embeds each dimension independently and concatenates them"""

    def __init__(self, outdim):
        super().__init__()
        self.timestep = Timestep(outdim)
        self.outdim = outdim
    
    def freeze(self):
        self.eval()

    def forward(self, x):
        if x.ndim == 1:
            x = x[:, None]
        assert len(x.shape) == 2
        b, dims = x.shape[0], x.shape[1]
        x = rearrange(x, "b d -> (b d)")
        emb = self.timestep(x)
        emb = rearrange(emb, "(b d) d2 -> b (d d2)", b=b, d=dims, d2=self.outdim)
        return emb


class GaussianEncoder(Encoder, AbstractEmbModel):
    def __init__(
        self, weight: float = 1.0, flatten_output: bool = True, *args, **kwargs
    ):
        super().__init__(*args, **kwargs)
        self.posterior = DiagonalGaussianRegularizer()
        self.weight = weight
        self.flatten_output = flatten_output

    def forward(self, x) -> Tuple[Dict, torch.Tensor]:
        z = super().forward(x)
        z, log = self.posterior(z)
        log["loss"] = log["kl_loss"]
        log["weight"] = self.weight
        if self.flatten_output:
            z = rearrange(z, "b c h w -> b (h w ) c")
        return log, z


class LatentEncoder(AbstractEmbModel):

    def __init__(self, scale_factor, config, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.scale_factor = scale_factor
        self.model = instantiate_from_config(config).eval()
        self.model.train = disabled_train
    
    def freeze(self):
        for param in self.model.parameters():
            param.requires_grad = False

    def forward(self, x):
        z = self.model.encode(x)
        z = self.scale_factor * z
        return z


class ViTSTREncoder(VisionTransformer):
    '''
    ViTSTREncoder is basically a ViT that uses ViTSTR weights
    '''
    def __init__(self, size=224, ckpt_path=None, freeze=True, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.grayscale = transforms.Grayscale()
        self.resize = transforms.Resize((size, size), transforms.InterpolationMode.BICUBIC, antialias=True)

        self.character = string.printable[:-6]
        self.reset_classifier(num_classes=len(self.character)+2)

        if ckpt_path is not None:
            self.load_state_dict(torch.load(ckpt_path, map_location="cpu"), strict=False)
        
        if freeze:
            self.freeze()

    def reset_classifier(self, num_classes):
        self.num_classes = num_classes
        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()

    def freeze(self):
        for param in self.parameters():
            param.requires_grad_(False)    

    def forward_features(self, x):
        B = x.shape[0]
        x = self.patch_embed(x)

        cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
        x = torch.cat((cls_tokens, x), dim=1)
        x = x + self.pos_embed
        x = self.pos_drop(x)

        for blk in self.blocks:
            x = blk(x)

        x = self.norm(x)
        return x

    def forward(self, x):
        
        x = self.forward_features(x)

        return x
    
    def encode(self, x):
        return self(x)


class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()

        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + torch.tile(self.pe[None, ...].to(x.device), (x.shape[0], 1, 1))
        return self.dropout(x)


class LabelEncoder(AbstractEmbModel, pl.LightningModule):

    def __init__(self, max_len, emb_dim, n_heads=8, n_trans_layers=12, ckpt_path=None, trainable=False, 
                 lr=1e-4, lambda_cls=0.1, lambda_pos=0.1, clip_dim=1024, visual_len=197, visual_dim=768, visual_config=None, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.max_len = max_len
        self.emd_dim = emb_dim
        self.n_heads = n_heads
        self.n_trans_layers = n_trans_layers
        self.character = string.printable[:-6]
        self.num_cls = len(self.character) + 1

        self.label_embedding = nn.Embedding(self.num_cls, self.emd_dim)
        self.pos_embedding = PositionalEncoding(d_model=self.emd_dim, max_len=self.max_len)
        transformer_block = nn.TransformerEncoderLayer(d_model=self.emd_dim, nhead=self.n_heads, batch_first=True)
        self.encoder = nn.TransformerEncoder(transformer_block, num_layers=self.n_trans_layers)

        if ckpt_path is not None:
            self.load_state_dict(torch.load(ckpt_path, map_location="cpu")["state_dict"], strict=False)

        if trainable:
            
            self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07))
            self.visual_encoder = instantiate_from_config(visual_config)

            self.learning_rate = lr
            self.clip_dim = clip_dim
            self.visual_len = visual_len
            self.visual_dim = visual_dim
            self.lambda_cls = lambda_cls
            self.lambda_pos = lambda_pos

            self.cls_head = nn.Sequential(*[
                nn.InstanceNorm1d(self.max_len),
                nn.Linear(self.emd_dim, self.emd_dim),
                nn.GELU(),
                nn.Linear(self.emd_dim, self.num_cls)
            ])

            self.pos_head = nn.Sequential(*[
                nn.InstanceNorm1d(self.max_len),
                nn.Linear(self.emd_dim, self.max_len, bias=False)
            ])

            self.text_head = nn.Sequential(*[
                nn.InstanceNorm1d(self.max_len),
                nn.Linear(self.emd_dim, self.clip_dim, bias=False),
                nn.Conv1d(in_channels=self.max_len, out_channels=1, kernel_size=1)
            ])

            self.visual_head = nn.Sequential(*[
                nn.InstanceNorm1d(self.visual_len),
                nn.Linear(self.visual_dim, self.clip_dim, bias=False),
                nn.Conv1d(in_channels=self.visual_len, out_channels=1, kernel_size=1)
            ])

    def freeze(self):
        for param in self.parameters():
            param.requires_grad = False

    def get_index(self, labels):

        indexes = []
        for label in labels:
            assert len(label) <= self.max_len
            index = [self.character.find(c)+1 for c in label]
            index = index + [0] * (self.max_len - len(index))
            indexes.append(index)
        
        return torch.tensor(indexes, device=next(self.parameters()).device)
    
    def get_embeddings(self, x):
        
        emb = self.label_embedding(x)
        emb = self.pos_embedding(emb)
        out = self.encoder(emb)

        return out

    def forward(self, labels):
        
        idx = self.get_index(labels)
        out = self.get_embeddings(idx)

        return out
    
    def get_loss(self, text_out, visual_out, clip_target, cls_out, pos_out, cls_target, pos_target):

        text_out = text_out / text_out.norm(dim=1, keepdim=True) # b, 1024
        visual_out = visual_out / visual_out.norm(dim=1, keepdim=True) # b, 1024

        logit_scale = self.logit_scale.exp()
        logits_per_image = logit_scale * visual_out @ text_out.T # b, b
        logits_per_text = logits_per_image.T # b, b

        clip_loss_image = nn.functional.cross_entropy(logits_per_image, clip_target)
        clip_loss_text = nn.functional.cross_entropy(logits_per_text, clip_target)
        clip_loss = (clip_loss_image + clip_loss_text) / 2 
        
        cls_loss = nn.functional.cross_entropy(cls_out.permute(0,2,1), cls_target)
        pos_loss = nn.functional.cross_entropy(pos_out.permute(0,2,1), pos_target)

        return clip_loss, cls_loss, pos_loss, logits_per_text
    
    def training_step(self, batch, batch_idx):

        text = batch["text"]
        image = batch["image"]

        idx = self.get_index(text)
        text_emb = self.get_embeddings(idx) # b, l, d
        visual_emb = self.visual_encoder(image) # b, n, d

        cls_out = self.cls_head(text_emb) # b, l, c
        pos_out = self.pos_head(text_emb) # b, l, p
        text_out = self.text_head(text_emb).squeeze(1) # b, 1024
        visual_out = self.visual_head(visual_emb).squeeze(1) # b, 1024
        
        cls_target = idx # b, c 
        pos_target = torch.arange(start=0, end=self.max_len, step=1)
        pos_target = pos_target[None].tile((idx.shape[0], 1)).to(cls_target) # b, c
        clip_target = torch.arange(0, idx.shape[0], 1).to(cls_target) # b,

        clip_loss, cls_loss, pos_loss, logits_per_text = self.get_loss(text_out, visual_out, clip_target, cls_out, pos_out, cls_target, pos_target)
        loss = clip_loss + self.lambda_cls * cls_loss + self.lambda_pos * pos_loss

        loss_dict = {}
        loss_dict["loss/clip_loss"] = clip_loss
        loss_dict["loss/cls_loss"] = cls_loss
        loss_dict["loss/pos_loss"] = pos_loss
        loss_dict["loss/full_loss"] = loss

        clip_idx = torch.max(logits_per_text, dim=-1).indices # b,
        clip_acc = (clip_idx == clip_target).to(dtype=torch.float32).mean()

        cls_idx = torch.max(cls_out, dim=-1).indices # b, l
        cls_acc = (cls_idx == cls_target).to(dtype=torch.float32).mean()

        pos_idx = torch.max(pos_out, dim=-1).indices # b, l
        pos_acc = (pos_idx == pos_target).to(dtype=torch.float32).mean()

        loss_dict["acc/clip_acc"] = clip_acc
        loss_dict["acc/cls_acc"] = cls_acc
        loss_dict["acc/pos_acc"] = pos_acc

        self.log_dict(loss_dict, prog_bar=True, batch_size=len(text),
                    logger=True, on_step=True, on_epoch=True, sync_dist=True)

        return loss

    def configure_optimizers(self):

        lr = self.learning_rate
        opt = torch.optim.AdamW(filter(lambda p: p.requires_grad, self.parameters()), lr=lr)

        return opt


================================================
FILE: sgm/modules/predictors/model.py
================================================
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.utils import save_image


class ParseqPredictor(nn.Module):

    def __init__(self, ckpt_path=None, freeze=True, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.parseq = torch.hub.load('./src/parseq', 'parseq', source='local').eval()
        self.parseq.load_state_dict(torch.load(ckpt_path, map_location="cpu"))
        self.parseq_transform = transforms.Compose([
            transforms.Resize(self.parseq.hparams.img_size, transforms.InterpolationMode.BICUBIC, antialias=True),
            transforms.Normalize(0.5, 0.5)
        ])

        if freeze:
            self.freeze()

    def freeze(self):
        for param in self.parseq.parameters():
            param.requires_grad_(False) 

    def forward(self, x):
        
        x = torch.cat([self.parseq_transform(t[None]) for t in x])
        logits = self.parseq(x.to(next(self.parameters()).device))

        return logits

    def img2txt(self, x):

        pred = self(x)
        label, confidence = self.parseq.tokenizer.decode(pred)
        return label

    
    def calc_loss(self, x, label):

        preds = self(x)  # (B, l, C) l=26, C=95
        gt_ids = self.parseq.tokenizer.encode(label).to(preds.device) # (B, l_trun)

        losses = []
        for pred, gt_id in zip(preds, gt_ids):

            eos_id = (gt_id == 0).nonzero().item()
            gt_id = gt_id[1: eos_id]
            pred = pred[:eos_id-1, :]

            ce_loss = nn.functional.cross_entropy(pred.permute(1, 0)[None], gt_id[None])
            ce_loss = torch.clamp(ce_loss, max = 1.0)
            losses.append(ce_loss[None])

        loss = torch.cat(losses)

        return loss

================================================
FILE: sgm/util.py
================================================
import functools
import importlib
import os
from functools import partial
from inspect import isfunction

import fsspec
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
from safetensors.torch import load_file as load_safetensors


def disabled_train(self, mode=True):
    """Overwrite model.train with this function to make sure train/eval mode
    does not change anymore."""
    return self


def get_string_from_tuple(s):
    try:
        # Check if the string starts and ends with parentheses
        if s[0] == "(" and s[-1] == ")":
            # Convert the string to a tuple
            t = eval(s)
            # Check if the type of t is tuple
            if type(t) == tuple:
                return t[0]
            else:
                pass
    except:
        pass
    return s


def is_power_of_two(n):
    """
    chat.openai.com/chat
    Return True if n is a power of 2, otherwise return False.

    The function is_power_of_two takes an integer n as input and returns True if n is a power of 2, otherwise it returns False.
    The function works by first checking if n is less than or equal to 0. If n is less than or equal to 0, it can't be a power of 2, so the function returns False.
    If n is greater than 0, the function checks whether n is a power of 2 by using a bitwise AND operation between n and n-1. If n is a power of 2, then it will have only one bit set to 1 in its binary representation. When we subtract 1 from a power of 2, all the bits to the right of that bit become 1, and the bit itself becomes 0. So, when we perform a bitwise AND between n and n-1, we get 0 if n is a power of 2, and a non-zero value otherwise.
    Thus, if the result of the bitwise AND operation is 0, then n is a power of 2 and the function returns True. Otherwise, the function returns False.

    """
    if n <= 0:
        return False
    return (n & (n - 1)) == 0


def autocast(f, enabled=True):
    def do_autocast(*args, **kwargs):
        with torch.cuda.amp.autocast(
            enabled=enabled,
            dtype=torch.get_autocast_gpu_dtype(),
            cache_enabled=torch.is_autocast_cache_enabled(),
        ):
            return f(*args, **kwargs)

    return do_autocast


def load_partial_from_config(config):
    return partial(get_obj_from_str(config["target"]), **config.get("params", dict()))


def log_txt_as_img(wh, xc, size=10):
    # wh a tuple of (width, height)
    # xc a list of captions to plot
    b = len(xc)
    txts = list()
    for bi in range(b):
        txt = Image.new("RGB", wh, color="white")
        draw = ImageDraw.Draw(txt)
        font = ImageFont.truetype("data/DejaVuSans.ttf", size=size)
        nc = int(40 * (wh[0] / 256))
        if isinstance(xc[bi], list):
            text_seq = xc[bi][0]
        else:
            text_seq = xc[bi]
        lines = "\n".join(
            text_seq[start : start + nc] for start in range(0, len(text_seq), nc)
        )

        try:
            draw.text((0, 0), lines, fill="black", font=font)
        except UnicodeEncodeError:
            print("Cant encode string for logging. Skipping.")

        txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0
        txts.append(txt)
    txts = np.stack(txts)
    txts = torch.tensor(txts)
    return txts


def partialclass(cls, *args, **kwargs):
    class NewCls(cls):
        __init__ = functools.partialmethod(cls.__init__, *args, **kwargs)

    return NewCls


def make_path_absolute(path):
    fs, p = fsspec.core.url_to_fs(path)
    if fs.protocol == "file":
        return os.path.abspath(p)
    return path


def ismap(x):
    if not isinstance(x, torch.Tensor):
        return False
    return (len(x.shape) == 4) and (x.shape[1] > 3)


def isimage(x):
    if not isinstance(x, torch.Tensor):
        return False
    return (len(x.shape) == 4) and (x.shape[1] == 3 or x.shape[1] == 1)


def isheatmap(x):
    if not isinstance(x, torch.Tensor):
        return False

    return x.ndim == 2


def isneighbors(x):
    if not isinstance(x, torch.Tensor):
        return False
    return x.ndim == 5 and (x.shape[2] == 3 or x.shape[2] == 1)


def exists(x):
    return x is not None


def expand_dims_like(x, y):
    while x.dim() != y.dim():
        x = x.unsqueeze(-1)
    return x


def default(val, d):
    if exists(val):
        return val
    return d() if isfunction(d) else d


def mean_flat(tensor):
    """
    https://github.com/openai/guided-diffusion/blob/27c20a8fab9cb472df5d6bdd6c8d11c8f430b924/guided_diffusion/nn.py#L86
    Take the mean over all non-batch dimensions.
    """
    return tensor.mean(dim=list(range(1, len(tensor.shape))))


def count_params(model, verbose=False):
    total_params = sum(p.numel() for p in model.parameters())
    if verbose:
        print(f"{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.")
    return total_params


def instantiate_from_config(config):
    if not "target" in config:
        if config == "__is_first_stage__":
            return None
        elif config == "__is_unconditional__":
            return None
        raise KeyError("Expected key `target` to instantiate.")
    return get_obj_from_str(config["target"])(**config.get("params", dict()))


def get_obj_from_str(string, reload=False, invalidate_cache=True):
    module, cls = string.rsplit(".", 1)
    if invalidate_cache:
        importlib.invalidate_caches()
    if reload:
        module_imp = importlib.import_module(module)
        importlib.reload(module_imp)
    return getattr(importlib.import_module(module, package=None), cls)


def append_zero(x):
    return torch.cat([x, x.new_zeros([1])])


def append_dims(x, target_dims):
    """Appends dimensions to the end of a tensor until it has target_dims dimensions."""
    dims_to_append = target_dims - x.ndim
    if dims_to_append < 0:
        raise ValueError(
            f"input has {x.ndim} dims but target_dims is {target_dims}, which is less"
        )
    return x[(...,) + (None,) * dims_to_append]


def load_model_from_config(config, ckpt, verbose=True, freeze=True):
    print(f"Loading model from {ckpt}")
    if ckpt.endswith("ckpt"):
        pl_sd = torch.load(ckpt, map_location="cpu")
        if "global_step" in pl_sd:
            print(f"Global Step: {pl_sd['global_step']}")
        sd = pl_sd["state_dict"]
    elif ckpt.endswith("safetensors"):
        sd = load_safetensors(ckpt)
    else:
        raise NotImplementedError

    model = instantiate_from_config(config.model)
    sd = pl_sd["state_dict"]

    m, u = model.load_state_dict(sd, strict=False)

    if len(m) > 0 and verbose:
        print("missing keys:")
        print(m)
    if len(u) > 0 and verbose:
        print("unexpected keys:")
        print(u)

    if freeze:
        for param in model.parameters():
            param.requires_grad = False

    model.eval()
    return model


================================================
FILE: src/parseq/.gitignore
================================================
# Output directories
outputs/
multirun/
ray_results/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
.python-version

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# IDE
.idea/


================================================
FILE: src/parseq/Datasets.md
================================================
We use various synthetic and real datasets. More info is in Appendix F of the supplementary material. Some preprocessing scripts are included in [`tools/`](tools).

| Dataset | Type  | Remarks |
|:-------:|:-----:|:--------|
| [MJSynth](https://www.robots.ox.ac.uk/~vgg/data/text/) | synthetic | Case-sensitive annotations were extracted from the image filenames |
| [SynthText](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | synthetic | Processed with [`crop_by_word_bb_syn90k.py`](https://github.com/FangShancheng/ABINet/blob/main/tools/crop_by_word_bb_syn90k.py) |
| [IC13](https://rrc.cvc.uab.es/?ch=2) | real | Three archives: 857, 1015, 1095 (full) |
| [IC15](https://rrc.cvc.uab.es/?ch=4) | real | Two archives: 1811, 2077 (full) |
| [CUTE80](http://cs-chan.com/downloads_cute80_dataset.html) | real | \[1\] |
| [IIIT5k](https://cvit.iiit.ac.in/research/projects/cvit-projects/the-iiit-5k-word-dataset) | real | \[1\] |
| [SVT](http://vision.ucsd.edu/~kai/svt/) | real | \[1\] |
| [SVTP](https://openaccess.thecvf.com/content_iccv_2013/html/Phan_Recognizing_Text_with_2013_ICCV_paper.html) | real | \[1\] |
| [ArT](https://rrc.cvc.uab.es/?ch=14) | real | \[2\] |
| [LSVT](https://rrc.cvc.uab.es/?ch=16) | real | \[2\] |
| [MLT19](https://rrc.cvc.uab.es/?ch=15) | real | \[2\] |
| [RCTW17](https://rctw.vlrlab.net/dataset.html) | real | \[2\] |
| [ReCTS](https://rrc.cvc.uab.es/?ch=12) | real | \[2\] |
| [Uber-Text](https://s3-us-west-2.amazonaws.com/uber-common-public/ubertext/index.html) | real | \[2\] |
| [COCO-Text v1.4](https://rrc.cvc.uab.es/?ch=5) | real | Processed with [`coco_text_converter.py`](tools/coco_text_converter.py) |
| [COCO-Text v2.0](https://bgshih.github.io/cocotext/) | real | Processed with [`coco_2_converter.py`](tools/coco_2_converter.py) |
| [OpenVINO](https://proceedings.mlr.press/v157/krylov21a.html) | real | [Annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text/) for a subset of [Open Images](https://github.com/cvdfoundation/open-images-dataset). Processed with [`openvino_converter.py`](tools/openvino_converter.py). |
| [TextOCR](https://textvqa.org/textocr/) | real | Annotations for a subset of Open Images. Processed with [`textocr_converter.py`](tools/textocr_converter.py). A _horizontal_ version can be generated by passing `--rectify_pose`. |

\[1\] Case-sensitive annotations from [Long and Yao](https://github.com/Jyouhou/Case-Sensitive-Scene-Text-Recognition-Datasets) + [our corrections](https://github.com/baudm/Case-Sensitive-Scene-Text-Recognition-Datasets). Processed with [case_sensitive_str_datasets_converter.py](tools/case_sensitive_str_datasets_converter.py)<br/>
\[2\] Archives used as-is from [Baek et al.](https://github.com/ku21fan/STR-Fewer-Labels/blob/main/data.md) They are included in the dataset release for convenience. Please refer to their work for more info about the datasets.

The preprocessed archives are available here: [val + test + most of train](https://drive.google.com/drive/folders/1NYuoi7dfJVgo-zUJogh8UQZgIMpLviOE), [TextOCR + OpenVINO](https://drive.google.com/drive/folders/1D9z_YJVa6f-O0juni-yG5jcwnhvYw-qC)

The expected filesystem structure is as follows:
```
data
├── test
│   ├── ArT
│   ├── COCOv1.4
│   ├── CUTE80
│   ├── IC13_1015
│   ├── IC13_1095  # Full IC13 test set. Typically not used for benchmarking but provided here for convenience.
│   ├── IC13_857
│   ├── IC15_1811
│   ├── IC15_2077
│   ├── IIIT5k
│   ├── SVT
│   ├── SVTP
│   └── Uber
├── train
│   ├── real
│   │   ├── ArT
│   │   │   ├── train
│   │   │   └── val
│   │   ├── COCOv2.0
│   │   │   ├── train
│   │   │   └── val
│   │   ├── LSVT
│   │   │   ├── test
│   │   │   ├── train
│   │   │   └── val
│   │   ├── MLT19
│   │   │   ├── test
│   │   │   ├── train
│   │   │   └── val
│   │   ├── OpenVINO
│   │   │   ├── train_1
│   │   │   ├── train_2
│   │   │   ├── train_5
│   │   │   ├── train_f
│   │   │   └── validation
│   │   ├── RCTW17
│   │   │   ├── test
│   │   │   ├── train
│   │   │   └── val
│   │   ├── ReCTS
│   │   │   ├── test
│   │   │   ├── train
│   │   │   └── val
│   │   ├── TextOCR
│   │   │   ├── train
│   │   │   └── val
│   │   └── Uber
│   │       ├── train
│   │       └── val
│   └── synth
│       ├── MJ
│       │   ├── test
│       │   ├── train
│       │   └── val
│       └── ST
└── val
   ├── IC13
   ├── IC15
   ├── IIIT5k
   └── SVT
```


================================================
FILE: src/parseq/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: src/parseq/NOTICE
================================================
Scene Text Recognition Model Hub
Copyright 2022 Darwin Bautista

The Initial Developer of strhub/models/abinet (sans system.py) is
Fang et al. (https://github.com/FangShancheng/ABINet).
Copyright 2021-2022 USTC

The Initial Developer of strhub/models/crnn (sans system.py) is
Jieru Mei (https://github.com/meijieru/crnn.pytorch).
Copyright 2017-2022 Jieru Mei

The Initial Developer of strhub/models/trba (sans system.py) is
Jeonghun Baek (https://github.com/clovaai/deep-text-recognition-benchmark).
Copyright 2019-2022 NAVER Corp.

The Initial Developer of strhub/models/vitstr (sans system.py) is
Rowel Atienza (https://github.com/roatienza/deep-text-recognition-benchmark).
Copyright 2021-2022 Rowel Atienza


================================================
FILE: src/parseq/README.md
================================================
<div align="center">

# Scene Text Recognition with<br/>Permuted Autoregressive Sequence Models
[![Apache License 2.0](https://img.shields.io/github/license/baudm/parseq)](https://github.com/baudm/parseq/blob/main/LICENSE)
[![arXiv preprint](http://img.shields.io/badge/arXiv-2207.06966-b31b1b)](https://arxiv.org/abs/2207.06966)
[![In Proc. ECCV 2022](http://img.shields.io/badge/ECCV-2022-6790ac)](https://www.ecva.net/papers/eccv_2022/papers_ECCV/html/556_ECCV_2022_paper.php)
[![Gradio demo](https://img.shields.io/badge/%F0%9F%A4%97%20demo-Gradio-ff7c00)](https://huggingface.co/spaces/baudm/PARSeq-OCR)

[**Darwin Bautista**](https://github.com/baudm) and [**Rowel Atienza**](https://github.com/roatienza)

Electrical and Electronics Engineering Institute<br/>
University of the Philippines, Diliman

[Method](#method-tldr) | [Sample Results](#sample-results) | [Getting Started](#getting-started) | [FAQ](#frequently-asked-questions) | [Training](#training) | [Evaluation](#evaluation) | [Citation](#citation)

</div>

Scene Text Recognition (STR) models use language context to be more robust against noisy or corrupted images. Recent approaches like ABINet use a standalone or external Language Model (LM) for prediction refinement. In this work, we show that the external LM&mdash;which requires upfront allocation of dedicated compute capacity&mdash;is inefficient for STR due to its poor performance vs cost characteristics. We propose a more efficient approach using **p**ermuted **a**uto**r**egressive **seq**uence (PARSeq) models. View our ECCV [poster](https://drive.google.com/file/d/19luOT_RMqmafLMhKQQHBnHNXV7fOCRfw/view) and [presentation](https://drive.google.com/file/d/11VoZW4QC5tbMwVIjKB44447uTiuCJAAD/view) for a brief overview.

![PARSeq](.github/gh-teaser.png)

**NOTE:** _P-S and P-Ti are shorthands for PARSeq-S and PARSeq-Ti, respectively._

### Method tl;dr

Our main insight is that with an ensemble of autoregressive (AR) models, we could unify the current STR decoding methods (context-aware AR and context-free non-AR) and the bidirectional (cloze) refinement model:
<div align="center"><img src=".github/contexts-example.png" alt="Unified STR model" width="75%"/></div>

A single Transformer can realize different models by merely varying its attention masks. This characteristic coupled with Permutation Language Modeling allows for a _unified_ STR model capable of context-free and context-aware inference, as well as iterative prediction refinement using bidirectional context **without** requiring a standalone language model. PARSeq can be considered an ensemble of AR models with shared architecture and weights:

![System](.github/system.png)


### Sample Results
<div align="center">

| Input Image                                                                | PARSeq-S<sub>A</sub> | ABINet            | TRBA              | ViTSTR-S          | CRNN              |
|:--------------------------------------------------------------------------:|:--------------------:|:-----------------:|:-----------------:|:-----------------:|:-----------------:|
| <img src="demo_images/art-01107.jpg" alt="CHEWBACCA" width="128"/>         | CHEWBACCA            | CHEWBA**GG**A     | CHEWBACCA         | CHEWBACCA         | CHEW**U**ACCA     |
| <img src="demo_images/coco-1166773.jpg" alt="Chevron" width="128"/>        | Chevro**l**          | Chevro\_          | Chevro\_          | Chevr\_\_         | Chevr\_\_         |
| <img src="demo_images/cute-184.jpg" alt="SALMON" height="128"/>            | SALMON               | SALMON            | SALMON            | SALMON            | SA\_MON           |
| <img src="demo_images/ic13_word_256.png" alt="Verbandstoffe" width="128"/> | Verbandst**e**ffe    | Verbandst**e**ffe | Verbandst**ell**e | Verbandst**e**ffe | Verbands**le**ffe |
| <img src="demo_images/ic15_word_26.png" alt="Kappa" width="128"/>          | Kappa                | Kappa             | Ka**s**pa         | Kappa             | Ka**ad**a         |
| <img src="demo_images/uber-27491.jpg" alt="3rdAve" height="128"/>          | 3rdAve               | 3=-Ave            | 3rdAve            | 3rdAve            | **Coke**          |

**NOTE:** _Bold letters and underscores indicate wrong and missing character predictions, respectively._
</div>

## Getting Started
This repository contains the reference implementation for PARSeq and reproduced models (collectively referred to as _Scene Text Recognition Model Hub_). See `NOTICE` for copyright information.
Majority of the code is licensed under the Apache License v2.0 (see `LICENSE`) while ABINet and CRNN sources are
released under the BSD and MIT licenses, respectively (see corresponding `LICENSE` files for details).

### Demo
An [interactive Gradio demo](https://huggingface.co/spaces/baudm/PARSeq-OCR) hosted at Hugging Face is available. The pretrained weights released here are used for the demo.

### Installation
Requires Python 3.7 and PyTorch 1.10 or newer. Tested on Python 3.9 and PyTorch 1.10.
```bash
$ pip install -r requirements.txt
$ pip install -e .
 ```
### Datasets
Download the [datasets](Datasets.md) from the following links:
1. [LMDB archives](https://drive.google.com/drive/folders/1NYuoi7dfJVgo-zUJogh8UQZgIMpLviOE) for MJSynth, SynthText, IIIT5k, SVT, SVTP, IC13, IC15, CUTE80, ArT, RCTW17, ReCTS, LSVT, MLT19, COCO-Text, and Uber-Text.
2. [LMDB archives](https://drive.google.com/drive/folders/1D9z_YJVa6f-O0juni-yG5jcwnhvYw-qC) for TextOCR and OpenVINO.

### Pretrained Models via Torch Hub
Available models are: `abinet`, `crnn`, `trba`, `vitstr`, `parseq_tiny`, and `parseq`.
```python
import torch
from PIL import Image
from strhub.data.module import SceneTextDataModule

# Load model and image transforms
parseq = torch.hub.load('baudm/parseq', 'parseq', pretrained=True).eval()
img_transform = SceneTextDataModule.get_transform(parseq.hparams.img_size)

img = Image.open('/path/to/image.png').convert('RGB')
# Preprocess. Model expects a batch of images with shape: (B, C, H, W)
img = img_transform(img).unsqueeze(0)

logits = parseq(img)
logits.shape  # torch.Size([1, 26, 95]), 94 characters + [EOS] symbol

# Greedy decoding
pred = logits.softmax(-1)
label, confidence = parseq.tokenizer.decode(pred)
print('Decoded label = {}'.format(label[0]))
```

## Frequently Asked Questions
- How do I train on a new language? See Issues [#5](https://github.com/baudm/parseq/issues/5) and [#9](https://github.com/baudm/parseq/issues/9).
- Can you export to TorchScript or ONNX? Yes, see Issue [#12](https://github.com/baudm/parseq/issues/12#issuecomment-1267842315).
- How do I test on my own dataset? See Issue [#27](https://github.com/baudm/parseq/issues/27).
- How do I finetune and/or create a custom dataset? See Issue [#7](https://github.com/baudm/parseq/issues/7).
- What is `val_NED`? See Issue [#10](https://github.com/baudm/parseq/issues/10).

## Training
The training script can train any supported model. You can override any configuration using the command line. Please refer to [Hydra](https://hydra.cc) docs for more info about the syntax. Use `./train.py --help` to see the default configuration.

<details><summary>Sample commands for different training configurations</summary><p>

### Finetune using pretrained weights
```bash
./train.py pretrained=parseq-tiny  # Not all experiments have pretrained weights
```

### Train a model variant/preconfigured experiment
The base model configurations are in `configs/model/`, while variations are stored in `configs/experiment/`.
```bash
./train.py +experiment=parseq-tiny  # Some examples: abinet-sv, trbc
```

### Specify the character set for training
```bash
./train.py charset=94_full  # Other options: 36_lowercase or 62_mixed-case. See configs/charset/
```

### Specify the training dataset
```bash
./train.py dataset=real  # Other option: synth. See configs/dataset/
```

### Change general model training parameters
```bash
./train.py model.img_size=[32, 128] model.max_label_length=25 model.batch_size=384
```

### Change data-related training parameters
```bash
./train.py data.root_dir=data data.num_workers=2 data.augment=true
```

### Change `pytorch_lightning.Trainer` parameters
```bash
./train.py trainer.max_epochs=20 trainer.gpus=2 +trainer.accelerator=gpu
```
Note that you can pass any [Trainer parameter](https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html),
you just need to prefix it with `+` if it is not originally specified in `configs/main.yaml`.

### Resume training from checkpoint (experimental)
```bash
./train.py +experiment=<model_exp> ckpt_path=outputs/<model>/<timestamp>/checkpoints/<checkpoint>.ckpt
```

</p></details>

## Evaluation
The test script, ```test.py```, can be used to evaluate any model trained with this project. For more info, see ```./test.py --help```.

PARSeq runtime parameters can be passed using the format `param:type=value`. For example, PARSeq NAR decoding can be invoked via `./test.py parseq.ckpt refine_iters:int=2 decode_ar:bool=false`.

<details><summary>Sample commands for reproducing results</summary><p>

### Lowercase alphanumeric comparison on benchmark datasets (Table 6)
```bash
./test.py outputs/<model>/<timestamp>/checkpoints/last.ckpt  # or use the released weights: ./test.py pretrained=parseq
```
**Sample output:**
| Dataset   | # samples | Accuracy | 1 - NED | Confidence | Label Length |
|:---------:|----------:|---------:|--------:|-----------:|-------------:|
| IIIT5k    |      3000 |    99.00 |   99.79 |      97.09 |         5.09 |
| SVT       |       647 |    97.84 |   99.54 |      95.87 |         5.86 |
| IC13_1015 |      1015 |    98.13 |   99.43 |      97.19 |         5.31 |
| IC15_2077 |      2077 |    89.22 |   96.43 |      91.91 |         5.33 |
| SVTP      |       645 |    96.90 |   99.36 |      94.37 |         5.86 |
| CUTE80    |       288 |    98.61 |   99.80 |      96.43 |         5.53 |
| **Combined** | **7672** | **95.95** | **98.78** | **95.34** | **5.33** |
--------------------------------------------------------------------------

### Benchmark using different evaluation character sets (Table 4)
```bash
./test.py outputs/<model>/<timestamp>/checkpoints/last.ckpt  # lowercase alphanumeric (36-character set)
./test.py outputs/<model>/<timestamp>/checkpoints/last.ckpt --cased  # mixed-case alphanumeric (62-character set)
./test.py outputs/<model>/<timestamp>/checkpoints/last.ckpt --cased --punctuation  # mixed-case alphanumeric + punctuation (94-character set)
```

### Lowercase alphanumeric comparison on more challenging datasets (Table 5)
```bash
./test.py outputs/<model>/<timestamp>/checkpoints/last.ckpt --new
```

### Benchmark Model Compute Requirements (Figure 5)
```bash
./bench.py model=parseq model.decode_ar=false model.refine_iters=3
<torch.utils.benchmark.utils.common.Measurement object at 0x7f8fcae67ee0>
model(x)
  Median: 14.87 ms
  IQR:    0.33 ms (14.78 to 15.12)
  7 measurements, 10 runs per measurement, 1 thread
| module                | #parameters   | #flops   | #activations   |
|:----------------------|:--------------|:---------|:---------------|
| model                 | 23.833M       | 3.255G   | 8.214M         |
|  encoder              |  21.381M      |  2.88G   |  7.127M        |
|  decoder              |  2.368M       |  0.371G  |  1.078M        |
|  head                 |  36.575K      |  3.794M  |  9.88K         |
|  text_embed.embedding |  37.248K      |  0       |  0             |
```

### Latency Measurements vs Output Label Length (Appendix I)
```bash
./bench.py model=parseq model.decode_ar=false model.refine_iters=3 +range=true
```

### Orientation robustness benchmark (Appendix J)
```bash
./test.py outputs/<model>/<timestamp>/checkpoints/last.ckpt --cased --punctuation  # no rotation
./test.py outputs/<model>/<timestamp>/checkpoints/last.ckpt --cased --punctuation --rotation 90
./test.py outputs/<model>/<timestamp>/checkpoints/last.ckpt --cased --punctuation --rotation 180
./test.py outputs/<model>/<timestamp>/checkpoints/last.ckpt --cased --punctuation --rotation 270
```

### Using trained models to read text from images (Appendix L)
```bash
./read.py outputs/<model>/<timestamp>/checkpoints/last.ckpt --images demo_images/*  # Or use ./read.py pretrained=parseq
Additional keyword arguments: {}
demo_images/art-01107.jpg: CHEWBACCA
demo_images/coco-1166773.jpg: Chevrol
demo_images/cute-184.jpg: SALMON
demo_images/ic13_word_256.png: Verbandsteffe
demo_images/ic15_word_26.png: Kaopa
demo_images/uber-27491.jpg: 3rdAve

# use NAR decoding + 2 refinement iterations for PARSeq
./read.py pretrained=parseq refine_iters:int=2 decode_ar:bool=false --images demo_images/*
```
</p></details>

## Tuning

We use [Ray Tune](https://www.ray.io/ray-tune) for automated parameter tuning of the learning rate. See `./tune.py --help`. Extend `tune.py` to support tuning of other hyperparameters.
```bash
./tune.py tune.num_samples=20  # find optimum LR for PARSeq's default config using 20 trials
./tune.py +experiment=tune_abinet-lm  # find the optimum learning rate for ABINet's language model
```

## Citation
```bibtex
@InProceedings{bautista2022parseq,
  title={Scene Text Recognition with Permuted Autoregressive Sequence Models},
  author={Bautista, Darwin and Atienza, Rowel},
  booktitle={European Conference on Computer Vision},
  pages={178--196},
  month={10},
  year={2022},
  publisher={Springer Nature Switzerland},
  address={Cham},
  doi={10.1007/978-3-031-19815-1_11},
  url={https://doi.org/10.1007/978-3-031-19815-1_11}
}
```


================================================
FILE: src/parseq/bench.py
================================================
#!/usr/bin/env python3
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import torch
from torch.utils import benchmark

from fvcore.nn import FlopCountAnalysis, ActivationCountAnalysis, flop_count_table

import hydra
from omegaconf import DictConfig


@torch.inference_mode()
@hydra.main(config_path='configs', config_name='bench', version_base='1.2')
def main(config: DictConfig):
    # For consistent behavior
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)

    device = config.get('device', 'cuda')

    h, w = config.data.img_size
    x = torch.rand(1, 3, h, w, device=device)
    model = hydra.utils.instantiate(config.model).eval().to(device)

    if config.get('range', False):
        for i in range(1, 26, 4):
            timer = benchmark.Timer(
                stmt='model(x, len)',
                globals={'model': model, 'x': x, 'len': i})
            print(timer.blocked_autorange(min_run_time=1))
    else:
        timer = benchmark.Timer(
            stmt='model(x)',
            globals={'model': model, 'x': x})
        flops = FlopCountAnalysis(model, x)
        acts = ActivationCountAnalysis(model, x)
        print(timer.blocked_autorange(min_run_time=1))
        print(flop_count_table(flops, 1, acts, False))


if __name__ == '__main__':
    main()


================================================
FILE: src/parseq/configs/bench.yaml
================================================
# Disable any logging or output
defaults:
  - main
  - _self_
  - override hydra/job_logging: disabled

hydra:
  output_subdir: null
  run:
    dir: .


================================================
FILE: src/parseq/configs/charset/36_lowercase.yaml
================================================
# @package _global_
model:
  charset_train: "0123456789abcdefghijklmnopqrstuvwxyz"


================================================
FILE: src/parseq/configs/charset/62_mixed-case.yaml
================================================
# @package _global_
model:
  charset_train: "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"


================================================
FILE: src/parseq/configs/charset/94_full.yaml
================================================
# @package _global_
model:
  charset_train: "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"


================================================
FILE: src/parseq/configs/dataset/real.yaml
================================================
# @package _global_
data:
  train_dir: real


================================================
FILE: src/parseq/configs/dataset/synth.yaml
================================================
# @package _global_
data:
  train_dir: synth
  num_workers: 3

trainer:
  limit_train_batches: 0.20496  # to match the steps per epoch of `real`


================================================
FILE: src/parseq/configs/experiment/abinet-sv.yaml
================================================
# @package _global_
defaults:
  - override /model: abinet

model:
  name: abinet-sv
  v_num_layers: 2
  v_attention: attention


================================================
FILE: src/parseq/configs/experiment/abinet.yaml
================================================
# @package _global_
defaults:
  - override /model: abinet


================================================
FILE: src/parseq/configs/experiment/crnn.yaml
================================================
# @package _global_
defaults:
  - override /model: crnn

data:
  num_workers: 5


================================================
FILE: src/parseq/configs/experiment/parseq-patch16-224.yaml
================================================
# @package _global_
defaults:
  - override /model: parseq

model:
  img_size: [ 224, 224 ]  # [ height, width ]
  patch_size: [ 16, 16 ]  # [ height, width ]


================================================
FILE: src/parseq/configs/experiment/parseq-tiny.yaml
================================================
# @package _global_
defaults:
  - override /model: parseq

model:
  name: parseq-tiny
  embed_dim: 192
  enc_num_heads: 3
  dec_num_heads: 6


================================================
FILE: src/parseq/configs/experiment/parseq.yaml
================================================
# @package _global_
defaults:
  - override /model: parseq


================================================
FILE: src/parseq/configs/experiment/trba.yaml
================================================
# @package _global_
defaults:
  - override /model: trba

data:
  num_workers: 3


================================================
FILE: src/parseq/configs/experiment/trbc.yaml
================================================
# @package _global_
defaults:
  - override /model: trba

model:
  name: trbc
  _target_: strhub.models.trba.system.TRBC
  lr: 1e-4

data:
  num_workers: 3


================================================
FILE: src/parseq/configs/experiment/tune_abinet-lm.yaml
================================================
# @package _global_
defaults:
  - override /model: abinet

model:
  name: abinet-lm
  lm_only: true

data:
  augment: false
  num_workers: 3

tune:
  gpus_per_trial: 0.5
  lr:
    min: 1e-5
    max: 1e-3


================================================
FILE: src/parseq/configs/experiment/vitstr.yaml
================================================
# @package _global_
defaults:
  - override /model: vitstr

model:
  img_size: [ 32, 128 ]  # [ height, width ]
  patch_size: [ 4, 8 ]  # [ height, width ]


================================================
FILE: src/parseq/configs/main.yaml
================================================
defaults:
  - _self_
  - model: parseq
  - charset: 94_full
  - dataset: real

model:
  _convert_: all
  img_size: [ 32, 128 ]  # [ height, width ]
  max_label_length: 25
  # The ordering in charset_train matters. It determines the token IDs assigned to each character.
  charset_train: ???
  # For charset_test, ordering doesn't matter.
  charset_test: "0123456789abcdefghijklmnopqrstuvwxyz"
  batch_size: 384
  weight_decay: 0.0
  warmup_pct: 0.075  # equivalent to 1.5 epochs of warm up

data:
  _target_: strhub.data.module.SceneTextDataModule
  root_dir: data
  train_dir: ???
  batch_size: ${model.batch_size}
  img_size: ${model.img_size}
  charset_train: ${model.charset_train}
  charset_test: ${model.charset_test}
  max_label_length: ${model.max_label_length}
  remove_whitespace: true
  normalize_unicode: true
  augment: true
  num_workers: 2

trainer:
  _target_: pytorch_lightning.Trainer
  _convert_: all
  val_check_interval: 1000
  #max_steps: 169680  # 20 epochs x 8484 steps (for batch size = 384, real data)
  max_epochs: 20
  gradient_clip_val: 20
  gpus: 2

ckpt_path: null
pretrained: null

hydra:
  output_subdir: config
  run:
    dir: outputs/${model.name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
  sweep:
    dir: multirun/${model.name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
    subdir: ${hydra.job.override_dirname}


================================================
FILE: src/parseq/configs/model/abinet.yaml
================================================
name: abinet
_target_: strhub.models.abinet.system.ABINet

# Shared Transformer configuration
d_model: 512
nhead: 8
d_inner: 2048
activation: relu
dropout: 0.1

# Architecture
v_backbone: transformer
v_num_layers: 3
v_attention: position
v_attention_mode: nearest
l_num_layers: 4
l_use_self_attn: false

# Training
lr: 3.4e-4
l_lr: 3e-4
iter_size: 3
a_loss_weight: 1.
v_loss_weight: 1.
l_loss_weight: 1.
l_detach: true


================================================
FILE: src/parseq/configs/model/crnn.yaml
================================================
name: crnn
_target_: strhub.models.crnn.system.CRNN

# Architecture
hidden_size: 256
leaky_relu: false

# Training
lr: 5.1e-4


================================================
FILE: src/parseq/configs/model/parseq.yaml
================================================
name: parseq
_target_: strhub.models.parseq.system.PARSeq

# Data
patch_size: [ 4, 8 ]  # [ height, width ]

# Architecture
embed_dim: 384
enc_num_heads: 6
enc_mlp_ratio: 4
enc_depth: 12
dec_num_heads: 12
dec_mlp_ratio: 4
dec_depth: 1

# Training
lr: 7e-4
perm_num: 6
perm_forward: true
perm_mirrored: true
dropout: 0.1

# Decoding mode (test)
decode_ar: true
refine_iters: 1


================================================
FILE: src/parseq/configs/model/trba.yaml
================================================
name: trba
_target_: strhub.models.trba.system.TRBA

# Architecture
num_fiducial: 20
output_channel: 512
hidden_size: 256

# Training
lr: 6.9e-4


================================================
FILE: src/parseq/configs/model/vitstr.yaml
================================================
name: vitstr
_target_: strhub.models.vitstr.system.ViTSTR

# Data
img_size: [ 224, 224 ]  # [ height, width ]
patch_size: [ 16, 16 ]  # [ height, width ]

# Architecture
embed_dim: 384
num_heads: 6

# Training
lr: 8.9e-4


================================================
FILE: src/parseq/configs/tune.yaml
================================================
defaults:
  - main
  - _self_

trainer:
  gpus: 1  # tuning with DDP is not yet supported.

tune:
  num_samples: 10
  gpus_per_trial: 1
  lr:
    min: 1e-4
    max: 2e-3
  resume_dir: null

hydra:
  run:
    dir: ray_results/${model.name}/${now:%Y-%m-%d}_${now:%H-%M-%S}


================================================
FILE: src/parseq/hubconf.py
================================================
from strhub.models.utils import create_model


dependencies = ['torch', 'pytorch_lightning', 'timm']


def parseq_tiny(pretrained: bool = False, decode_ar: bool = True, refine_iters: int = 1, **kwargs):
    """
    PARSeq tiny model (img_size=128x32, patch_size=8x4, d_model=192)
    @param pretrained: (bool) Use pretrained weights
    @param decode_ar: (bool) use AR decoding
    @param refine_iters: (int) number of refinement iterations to use
    """
    return create_model('parseq-tiny', pretrained, decode_ar=decode_ar, refine_iters=refine_iters, **kwargs)


def parseq(pretrained: bool = False, decode_ar: bool = True, refine_iters: int = 1, **kwargs):
    """
    PARSeq base model (img_size=128x32, patch_size=8x4, d_model=384)
    @param pretrained: (bool) Use pretrained weights
    @param decode_ar: (bool) use AR decoding
    @param refine_iters: (int) number of refinement iterations to use
    """
    return create_model('parseq', pretrained, decode_ar=decode_ar, refine_iters=refine_iters, **kwargs)


def abinet(pretrained: bool = False, iter_size: int = 3, **kwargs):
    """
    ABINet model (img_size=128x32)
    @param pretrained: (bool) Use pretrained weights
    @param iter_size: (int) number of refinement iterations to use
    """
    return create_model('abinet', pretrained, iter_size=iter_size, **kwargs)


def trba(pretrained: bool = False, **kwargs):
    """
    TRBA model (img_size=128x32)
    @param pretrained: (bool) Use pretrained weights
    """
    return create_model('trba', pretrained, **kwargs)


def vitstr(pretrained: bool = False, **kwargs):
    """
    ViTSTR small model (img_size=128x32, patch_size=8x4, d_model=384)
    @param pretrained: (bool) Use pretrained weights
    """
    return create_model('vitstr', pretrained, **kwargs)


def crnn(pretrained: bool = False, **kwargs):
    """
    CRNN model (img_size=128x32)
    @param pretrained: (bool) Use pretrained weights
    """
    return create_model('crnn', pretrained, **kwargs)


================================================
FILE: src/parseq/read.py
================================================
#!/usr/bin/env python3
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse

import torch

from PIL import Image

from strhub.data.module import SceneTextDataModule
from strhub.models.utils import load_from_checkpoint, parse_model_args


@torch.inference_mode()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('checkpoint', help="Model checkpoint (or 'pretrained=<model_id>')")
    parser.add_argument('--images', nargs='+', help='Images to read')
    parser.add_argument('--device', default='cuda')
    args, unknown = parser.parse_known_args()
    kwargs = parse_model_args(unknown)
    print(f'Additional keyword arguments: {kwargs}')

    model = load_from_checkpoint(args.checkpoint, **kwargs).eval().to(args.device)
    img_transform = SceneTextDataModule.get_transform(model.hparams.img_size)

    for fname in args.images:
        # Load image and prepare for input
        image = Image.open(fname).convert('RGB')
        image = img_transform(image).unsqueeze(0).to(args.device)

        p = model(image).softmax(-1)
        pred, p = model.tokenizer.decode(p)
        print(f'{fname}: {pred[0]}')


if __name__ == '__main__':
    main()


================================================
FILE: src/parseq/requirements.txt
================================================
torch>=1.10.2
torchvision>=0.11.3
pytorch-lightning~=1.6.5
timm~=0.6.5
nltk~=3.7.0
lmdb~=1.3.0
Pillow~=9.2.0
imgaug~=0.4.0
hydra-core~=1.2.0
fvcore~=0.1.5.post20220512
ray[tune]~=1.13.0
ax-platform~=0.2.5.1
PyYAML~=6.0.0
tqdm~=4.64.0


================================================
FILE: src/parseq/setup.cfg
================================================
[tool:pytest]
norecursedirs =
    .git
    dist
    build
addopts =
    --strict
    --doctest-modules
    --durations=0

[coverage:report]
exclude_lines =
    pragma: no-cover
    pass

[flake8]
max-line-length = 120
exclude = .tox,*.egg,build,temp
select = E,W,F
doctests = True
verbose = 2
# https://pep8.readthedocs.io/en/latest/intro.html#error-codes
format = pylint
# see: https://www.flake8rules.com/
ignore =
    E731  # Do not assign a lambda expression, use a def
    W504  # Line break occurred after a binary operator
    F401  # Module imported but unused
    F841  # Local variable name is assigned to but never used
    W605  # Invalid escape sequence 'x'

# setup.cfg or tox.ini
[check-manifest]
ignore =
    *.yml
    .github
    .github/*

[metadata]
license_file = LICENSE
description-file = README.md
# long_description = file:README.md
# long_description_content_type = text/markdown


================================================
FILE: src/parseq/setup.py
================================================
#!/usr/bin/env python

from setuptools import setup, find_packages

setup(
    name='strhub',
    version='1.1.0',
    description='Scene Text Recognition Model Hub: A collection of deep learning models for Scene Text Recognition',
    author='Darwin Bautista',
    author_email='baudm@users.noreply.github.com',
    url='https://github.com/baudm/parseq',
    install_requires=['torch~=1.12.1', 'pytorch-lightning~=1.6.5', 'timm~=0.6.5'],
    packages=find_packages(),
)


================================================
FILE: src/parseq/strhub/__init__.py
================================================


================================================
FILE: src/parseq/strhub/data/__init__.py
================================================


================================================
FILE: src/parseq/strhub/data/aa_overrides.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Extends default ops to accept optional parameters."""
from functools import partial

from timm.data.auto_augment import _LEVEL_DENOM, _randomly_negate, LEVEL_TO_ARG, NAME_TO_OP, rotate


def rotate_expand(img, degrees, **kwargs):
    """Rotate operation with expand=True to avoid cutting off the characters"""
    kwargs['expand'] = True
    return rotate(img, degrees, **kwargs)


def _level_to_arg(level, hparams, key, default):
    magnitude = hparams.get(key, default)
    level = (level / _LEVEL_DENOM) * magnitude
    level = _randomly_negate(level)
    return level,


def apply():
    # Overrides
    NAME_TO_OP.update({
        'Rotate': rotate_expand
    })
    LEVEL_TO_ARG.update({
        'Rotate': partial(_level_to_arg, key='rotate_deg', default=30.),
        'ShearX': partial(_level_to_arg, key='shear_x_pct', default=0.3),
        'ShearY': partial(_level_to_arg, key='shear_y_pct', default=0.3),
        'TranslateXRel': partial(_level_to_arg, key='translate_x_pct', default=0.45),
        'TranslateYRel': partial(_level_to_arg, key='translate_y_pct', default=0.45),
    })


================================================
FILE: src/parseq/strhub/data/augment.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import partial

import imgaug.augmenters as iaa
import numpy as np
from PIL import ImageFilter, Image
from timm.data import auto_augment

from strhub.data import aa_overrides

aa_overrides.apply()

_OP_CACHE = {}


def _get_op(key, factory):
    try:
        op = _OP_CACHE[key]
    except KeyError:
        op = factory()
        _OP_CACHE[key] = op
    return op


def _get_param(level, img, max_dim_factor, min_level=1):
    max_level = max(min_level, max_dim_factor * max(img.size))
    return round(min(level, max_level))


def gaussian_blur(img, radius, **__):
    radius = _get_param(radius, img, 0.02)
    key = 'gaussian_blur_' + str(radius)
    op = _get_op(key, lambda: ImageFilter.GaussianBlur(radius))
    return img.filter(op)


def motion_blur(img, k, **__):
    k = _get_param(k, img, 0.08, 3) | 1  # bin to odd values
    key = 'motion_blur_' + str(k)
    op = _get_op(key, lambda: iaa.MotionBlur(k))
    return Image.fromarray(op(image=np.asarray(img)))


def gaussian_noise(img, scale, **_):
    scale = _get_param(scale, img, 0.25) | 1  # bin to odd values
    key = 'gaussian_noise_' + str(scale)
    op = _get_op(key, lambda: iaa.AdditiveGaussianNoise(scale=scale))
    return Image.fromarray(op(image=np.asarray(img)))


def poisson_noise(img, lam, **_):
    lam = _get_param(lam, img, 0.2) | 1  # bin to odd values
    key = 'poisson_noise_' + str(lam)
    op = _get_op(key, lambda: iaa.AdditivePoissonNoise(lam))
    return Image.fromarray(op(image=np.asarray(img)))


def _level_to_arg(level, _hparams, max):
    level = max * level / auto_augment._LEVEL_DENOM
    return level,


_RAND_TRANSFORMS = auto_augment._RAND_INCREASING_TRANSFORMS.copy()
_RAND_TRANSFORMS.remove('SharpnessIncreasing')  # remove, interferes with *blur ops
_RAND_TRANSFORMS.extend([
    'GaussianBlur',
    # 'MotionBlur',
    # 'GaussianNoise',
    'PoissonNoise'
])
auto_augment.LEVEL_TO_ARG.update({
    'GaussianBlur': partial(_level_to_arg, max=4),
    'MotionBlur': partial(_level_to_arg, max=20),
    'GaussianNoise': partial(_level_to_arg, max=0.1 * 255),
    'PoissonNoise': partial(_level_to_arg, max=40)
})
auto_augment.NAME_TO_OP.update({
    'GaussianBlur': gaussian_blur,
    'MotionBlur': motion_blur,
    'GaussianNoise': gaussian_noise,
    'PoissonNoise': poisson_noise
})


def rand_augment_transform(magnitude=5, num_layers=3):
    # These are tuned for magnitude=5, which means that effective magnitudes are half of these values.
    hparams = {
        'rotate_deg': 30,
        'shear_x_pct': 0.9,
        'shear_y_pct': 0.2,
        'translate_x_pct': 0.10,
        'translate_y_pct': 0.30
    }
    ra_ops = auto_augment.rand_augment_ops(magnitude, hparams, transforms=_RAND_TRANSFORMS)
    # Supply weights to disable replacement in random selection (i.e. avoid applying the same op twice)
    choice_weights = [1. / len(ra_ops) for _ in range(len(ra_ops))]
    return auto_augment.RandAugment(ra_ops, num_layers, choice_weights)


================================================
FILE: src/parseq/strhub/data/dataset.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import io
import logging
import unicodedata
from pathlib import Path, PurePath
from typing import Callable, Optional, Union

import lmdb
from PIL import Image
from torch.utils.data import Dataset, ConcatDataset

from strhub.data.utils import CharsetAdapter

log = logging.getLogger(__name__)


def build_tree_dataset(root: Union[PurePath, str], *args, **kwargs):
    try:
        kwargs.pop('root')  # prevent 'root' from being passed via kwargs
    except KeyError:
        pass
    root = Path(root).absolute()
    log.info(f'dataset root:\t{root}')
    datasets = []
    for mdb in glob.glob(str(root / '**/data.mdb'), recursive=True):
        mdb = Path(mdb)
        ds_name = str(mdb.parent.relative_to(root))
        ds_root = str(mdb.parent.absolute())
        dataset = LmdbDataset(ds_root, *args, **kwargs)
        log.info(f'\tlmdb:\t{ds_name}\tnum samples: {len(dataset)}')
        datasets.append(dataset)
    return ConcatDataset(datasets)


class LmdbDataset(Dataset):
    """Dataset interface to an LMDB database.

    It supports both labelled and unlabelled datasets. For unlabelled datasets, the image index itself is returned
    as the label. Unicode characters are normalized by default. Case-sensitivity is inferred from the charset.
    Labels are transformed according to the charset.
    """

    def __init__(self, root: str, charset: str, max_label_len: int, min_image_dim: int = 0,
                 remove_whitespace: bool = True, normalize_unicode: bool = True,
                 unlabelled: bool = False, transform: Optional[Callable] = None):
        self._env = None
        self.root = root
        self.unlabelled = unlabelled
        self.transform = transform
        self.labels = []
        self.filtered_index_list = []
        self.num_samples = self._preprocess_labels(charset, remove_whitespace, normalize_unicode,
                                                   max_label_len, min_image_dim)

    def __del__(self):
        if self._env is not None:
            self._env.close()
            self._env = None

    def _create_env(self):
        return lmdb.open(self.root, max_readers=1, readonly=True, create=False,
                         readahead=False, meminit=False, lock=False)

    @property
    def env(self):
        if self._env is None:
            self._env = self._create_env()
        return self._env

    def _preprocess_labels(self, charset, remove_whitespace, normalize_unicode, max_label_len, min_image_dim):
        charset_adapter = CharsetAdapter(charset)
        with self._create_env() as env, env.begin() as txn:
            num_samples = int(txn.get('num-samples'.encode()))
            if self.unlabelled:
                return num_samples
            for index in range(num_samples):
                index += 1  # lmdb starts with 1
                label_key = f'label-{index:09d}'.encode()
                label = txn.get(label_key).decode()
                # Normally, whitespace is removed from the labels.
                if remove_whitespace:
                    label = ''.join(label.split())
                # Normalize unicode composites (if any) and convert to compatible ASCII characters
                if normalize_unicode:
                    label = unicodedata.normalize('NFKD', label).encode('ascii', 'ignore').decode()
                # Filter by length before removing unsupported characters. The original label might be too long.
                if len(label) > max_label_len:
                    continue
                label = charset_adapter(label)
                # We filter out samples which don't contain any supported characters
                if not label:
                    continue
                # Filter images that are too small.
                if min_image_dim > 0:
                    img_key = f'image-{index:09d}'.encode()
                    buf = io.BytesIO(txn.get(img_key))
                    w, h = Image.open(buf).size
                    if w < self.min_image_dim or h < self.min_image_dim:
                        continue
                self.labels.append(label)
                self.filtered_index_list.append(index)
        return len(self.labels)

    def __len__(self):
        return self.num_samples

    def __getitem__(self, index):
        if self.unlabelled:
            label = index
        else:
            label = self.labels[index]
            index = self.filtered_index_list[index]

        img_key = f'image-{index:09d}'.encode()
        with self.env.begin() as txn:
            imgbuf = txn.get(img_key)
        buf = io.BytesIO(imgbuf)
        img = Image.open(buf).convert('RGB')

        if self.transform is not None:
            img = self.transform(img)

        return img, label


================================================
FILE: src/parseq/strhub/data/module.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pathlib import PurePath
from typing import Optional, Callable, Sequence, Tuple

import pytorch_lightning as pl
from torch.utils.data import DataLoader
from torchvision import transforms as T

from .dataset import build_tree_dataset, LmdbDataset


class SceneTextDataModule(pl.LightningDataModule):
    TEST_BENCHMARK_SUB = ('IIIT5k', 'SVT', 'IC13_857', 'IC15_1811', 'SVTP', 'CUTE80')
    TEST_BENCHMARK = ('IIIT5k', 'SVT', 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80')
    TEST_NEW = ('ArT', 'COCOv1.4', 'Uber')
    TEST_ALL = tuple(set(TEST_BENCHMARK_SUB + TEST_BENCHMARK + TEST_NEW))

    def __init__(self, root_dir: str, train_dir: str, img_size: Sequence[int], max_label_length: int,
                 charset_train: str, charset_test: str, batch_size: int, num_workers: int, augment: bool,
                 remove_whitespace: bool = True, normalize_unicode: bool = True,
                 min_image_dim: int = 0, rotation: int = 0, collate_fn: Optional[Callable] = None):
        super().__init__()
        self.root_dir = root_dir
        self.train_dir = train_dir
        self.img_size = tuple(img_size)
        self.max_label_length = max_label_length
        self.charset_train = charset_train
        self.charset_test = charset_test
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.augment = augment
        self.remove_whitespace = remove_whitespace
        self.normalize_unicode = normalize_unicode
        self.min_image_dim = min_image_dim
        self.rotation = rotation
        self.collate_fn = collate_fn
        self._train_dataset = None
        self._val_dataset = None

    @staticmethod
    def get_transform(img_size: Tuple[int], augment: bool = False, rotation: int = 0):
        transforms = []
        if augment:
            from .augment import rand_augment_transform
            transforms.append(rand_augment_transform())
        if rotation:
            transforms.append(lambda img: img.rotate(rotation, expand=True))
        transforms.extend([
            T.Resize(img_size, T.InterpolationMode.BICUBIC),
            T.ToTensor(),
            T.Normalize(0.5, 0.5)
        ])
        return T.Compose(transforms)

    @property
    def train_dataset(self):
        if self._train_dataset is None:
            transform = self.get_transform(self.img_size, self.augment)
            root = PurePath(self.root_dir, 'train', self.train_dir)
            self._train_dataset = build_tree_dataset(root, self.charset_train, self.max_label_length,
                                                     self.min_image_dim, self.remove_whitespace, self.normalize_unicode,
                                                     transform=transform)
        return self._train_dataset

    @property
    def val_dataset(self):
        if self._val_dataset is None:
            transform = self.get_transform(self.img_size)
            root = PurePath(self.root_dir, 'val')
            self._val_dataset = build_tree_dataset(root, self.charset_test, self.max_label_length,
                                                   self.min_image_dim, self.remove_whitespace, self.normalize_unicode,
                                                   transform=transform)
        return self._val_dataset

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True,
                          num_workers=self.num_workers, persistent_workers=self.num_workers > 0,
                          pin_memory=True, collate_fn=self.collate_fn)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size,
                          num_workers=self.num_workers, persistent_workers=self.num_workers > 0,
                          pin_memory=True, collate_fn=self.collate_fn)

    def test_dataloaders(self, subset):
        transform = self.get_transform(self.img_size, rotation=self.rotation)
        root = PurePath(self.root_dir, 'test')
        datasets = {s: LmdbDataset(str(root / s), self.charset_test, self.max_label_length,
                                   self.min_image_dim, self.remove_whitespace, self.normalize_unicode,
                                   transform=transform) for s in subset}
        return {k: DataLoader(v, batch_size=self.batch_size, num_workers=self.num_workers,
                              pin_memory=True, collate_fn=self.collate_fn)
                for k, v in datasets.items()}


================================================
FILE: src/parseq/strhub/data/utils.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
from abc import ABC, abstractmethod
from itertools import groupby
from typing import List, Optional, Tuple

import torch
from torch import Tensor
from torch.nn.utils.rnn import pad_sequence


class CharsetAdapter:
    """Transforms labels according to the target charset."""

    def __init__(self, target_charset) -> None:
        super().__init__()
        self.lowercase_only = target_charset == target_charset.lower()
        self.uppercase_only = target_charset == target_charset.upper()
        self.unsupported = f'[^{re.escape(target_charset)}]'

    def __call__(self, label):
        if self.lowercase_only:
            label = label.lower()
        elif self.uppercase_only:
            label = label.upper()
        # Remove unsupported characters
        label = re.sub(self.unsupported, '', label)
        return label


class BaseTokenizer(ABC):

    def __init__(self, charset: str, specials_first: tuple = (), specials_last: tuple = ()) -> None:
        self._itos = specials_first + tuple(charset) + specials_last
        self._stoi = {s: i for i, s in enumerate(self._itos)}

    def __len__(self):
        return len(self._itos)

    def _tok2ids(self, tokens: str) -> List[int]:
        return [self._stoi[s] for s in tokens]

    def _ids2tok(self, token_ids: List[int], join: bool = True) -> str:
        tokens = [self._itos[i] for i in token_ids]
        return ''.join(tokens) if join else tokens

    @abstractmethod
    def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
        """Encode a batch of labels to a representation suitable for the model.

        Args:
            labels: List of labels. Each can be of arbitrary length.
            device: Create tensor on this device.

        Returns:
            Batched tensor representation padded to the max label length. Shape: N, L
        """
        raise NotImplementedError

    @abstractmethod
    def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
        """Internal method which performs the necessary filtering prior to decoding."""
        raise NotImplementedError

    def decode(self, token_dists: Tensor, raw: bool = False) -> Tuple[List[str], List[Tensor]]:
        """Decode a batch of token distributions.

        Args:
            token_dists: softmax probabilities over the token distribution. Shape: N, L, C
            raw: return unprocessed labels (will return list of list of strings)

        Returns:
            list of string labels (arbitrary length) and
            their corresponding sequence probabilities as a list of Tensors
        """
        batch_tokens = []
        batch_probs = []
        for dist in token_dists:
            probs, ids = dist.max(-1)  # greedy selection
            if not raw:
                probs, ids = self._filter(probs, ids)
            tokens = self._ids2tok(ids, not raw)
            batch_tokens.append(tokens)
            batch_probs.append(probs)
        return batch_tokens, batch_probs


class Tokenizer(BaseTokenizer):
    BOS = '[B]'
    EOS = '[E]'
    PAD = '[P]'

    def __init__(self, charset: str) -> None:
        specials_first = (self.EOS,)
        specials_last = (self.BOS, self.PAD)
        super().__init__(charset, specials_first, specials_last)
        self.eos_id, self.bos_id, self.pad_id = [self._stoi[s] for s in specials_first + specials_last]

    def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
        batch = [torch.as_tensor([self.bos_id] + self._tok2ids(y) + [self.eos_id], dtype=torch.long, device=device)
                 for y in labels]
        
        return pad_sequence(batch, batch_first=True, padding_value=self.pad_id)

    def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
        ids = ids.tolist()
        try:
            eos_idx = ids.index(self.eos_id)
        except ValueError:
            eos_idx = len(ids)  # Nothing to truncate.
        # Truncate after EOS
        ids = ids[:eos_idx]
        probs = probs[:eos_idx + 1]  # but include prob. for EOS (if it exists)
        return probs, ids


class CTCTokenizer(BaseTokenizer):
    BLANK = '[B]'

    def __init__(self, charset: str) -> None:
        # BLANK uses index == 0 by default
        super().__init__(charset, specials_first=(self.BLANK,))
        self.blank_id = self._stoi[self.BLANK]

    def encode(self, labels: List[str], device: Optional[torch.device] = None) -> Tensor:
        # We use a padded representation since we don't want to use CUDNN's CTC implementation
        batch = [torch.as_tensor(self._tok2ids(y), dtype=torch.long, device=device) for y in labels]
        return pad_sequence(batch, batch_first=True, padding_value=self.blank_id)

    def _filter(self, probs: Tensor, ids: Tensor) -> Tuple[Tensor, List[int]]:
        # Best path decoding:
        ids = list(zip(*groupby(ids.tolist())))[0]  # Remove duplicate tokens
        ids = [x for x in ids if x != self.blank_id]  # Remove BLANKs
        # `probs` is just pass-through since all positions are considered part of the path
        return probs, ids


================================================
FILE: src/parseq/strhub/models/__init__.py
================================================


================================================
FILE: src/parseq/strhub/models/abinet/LICENSE
================================================
ABINet for non-commercial purposes

Copyright (c) 2021, USTC
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: src/parseq/strhub/models/abinet/__init__.py
================================================
r"""
Fang, Shancheng, Hongtao, Xie, Yuxin, Wang, Zhendong, Mao, and Yongdong, Zhang.
"Read Like Humans: Autonomous, Bidirectional and Iterative Language Modeling for Scene Text Recognition." .
In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (pp. 7098-7107).2021.

https://arxiv.org/abs/2103.06495

All source files, except `system.py`, are based on the implementation listed below,
and hence are released under the license of the original.

Source: https://github.com/FangShancheng/ABINet
License: 2-clause BSD License (see included LICENSE file)
"""


================================================
FILE: src/parseq/strhub/models/abinet/attention.py
================================================
import torch
import torch.nn as nn

from .transformer import PositionalEncoding


class Attention(nn.Module):
    def __init__(self, in_channels=512, max_length=25, n_feature=256):
        super().__init__()
        self.max_length = max_length

        self.f0_embedding = nn.Embedding(max_length, in_channels)
        self.w0 = nn.Linear(max_length, n_feature)
        self.wv = nn.Linear(in_channels, in_channels)
        self.we = nn.Linear(in_channels, max_length)

        self.active = nn.Tanh()
        self.softmax = nn.Softmax(dim=2)

    def forward(self, enc_output):
        enc_output = enc_output.permute(0, 2, 3, 1).flatten(1, 2)
        reading_order = torch.arange(self.max_length, dtype=torch.long, device=enc_output.device)
        reading_order = reading_order.unsqueeze(0).expand(enc_output.size(0), -1)  # (S,) -> (B, S)
        reading_order_embed = self.f0_embedding(reading_order)  # b,25,512

        t = self.w0(reading_order_embed.permute(0, 2, 1))  # b,512,256
        t = self.active(t.permute(0, 2, 1) + self.wv(enc_output))  # b,256,512

        attn = self.we(t)  # b,256,25
        attn = self.softmax(attn.permute(0, 2, 1))  # b,25,256
        g_output = torch.bmm(attn, enc_output)  # b,25,512
        return g_output, attn.view(*attn.shape[:2], 8, 32)


def encoder_layer(in_c, out_c, k=3, s=2, p=1):
    return nn.Sequential(nn.Conv2d(in_c, out_c, k, s, p),
                         nn.BatchNorm2d(out_c),
                         nn.ReLU(True))


def decoder_layer(in_c, out_c, k=3, s=1, p=1, mode='nearest', scale_factor=None, size=None):
    align_corners = None if mode == 'nearest' else True
    return nn.Sequential(nn.Upsample(size=size, scale_factor=scale_factor,
                                     mode=mode, align_corners=align_corners),
                         nn.Conv2d(in_c, out_c, k, s, p),
                         nn.BatchNorm2d(out_c),
                         nn.ReLU(True))


class PositionAttention(nn.Module):
    def __init__(self, max_length, in_channels=512, num_channels=64,
                 h=8, w=32, mode='nearest', **kwargs):
        super().__init__()
        self.max_length = max_length
        self.k_encoder = nn.Sequential(
            encoder_layer(in_channels, num_channels, s=(1, 2)),
            encoder_layer(num_channels, num_channels, s=(2, 2)),
            encoder_layer(num_channels, num_channels, s=(2, 2)),
            encoder_layer(num_channels, num_channels, s=(2, 2))
        )
        self.k_decoder = nn.Sequential(
            decoder_layer(num_channels, num_channels, scale_factor=2, mode=mode),
            decoder_layer(num_channels, num_channels, scale_factor=2, mode=mode),
            decoder_layer(num_channels, num_channels, scale_factor=2, mode=mode),
            decoder_layer(num_channels, in_channels, size=(h, w), mode=mode)
        )

        self.pos_encoder = PositionalEncoding(in_channels, dropout=0., max_len=max_length)
        self.project = nn.Linear(in_channels, in_channels)

    def forward(self, x):
        N, E, H, W = x.size()
        k, v = x, x  # (N, E, H, W)

        # calculate key vector
        features = []
        for i in range(0, len(self.k_encoder)):
            k = self.k_encoder[i](k)
            features.append(k)
        for i in range(0, len(self.k_decoder) - 1):
            k = self.k_decoder[i](k)
            k = k + features[len(self.k_decoder) - 2 - i]
        k = self.k_decoder[-1](k)

        # calculate query vector
        # TODO q=f(q,k)
        zeros = x.new_zeros((self.max_length, N, E))  # (T, N, E)
        q = self.pos_encoder(zeros)  # (T, N, E)
        q = q.permute(1, 0, 2)  # (N, T, E)
        q = self.project(q)  # (N, T, E)

        # calculate attention
        attn_scores = torch.bmm(q, k.flatten(2, 3))  # (N, T, (H*W))
        attn_scores = attn_scores / (E ** 0.5)
        attn_scores = torch.softmax(attn_scores, dim=-1)

        v = v.permute(0, 2, 3, 1).view(N, -1, E)  # (N, (H*W), E)
        attn_vecs = torch.bmm(attn_scores, v)  # (N, T, E)

        return attn_vecs, attn_scores.view(N, -1, H, W)


================================================
FILE: src/parseq/strhub/models/abinet/backbone.py
================================================
import torch.nn as nn
from torch.nn import TransformerEncoderLayer, TransformerEncoder

from .resnet import resnet45
from .transformer import PositionalEncoding


class ResTranformer(nn.Module):
    def __init__(self, d_model=512, nhead=8, d_inner=2048, dropout=0.1, activation='relu', backbone_ln=2):
        super().__init__()
        self.resnet = resnet45()
        self.pos_encoder = PositionalEncoding(d_model, max_len=8 * 32)
        encoder_layer = TransformerEncoderLayer(d_model=d_model, nhead=nhead,
                                                dim_feedforward=d_inner, dropout=dropout, activation=activation)
        self.transformer = TransformerEncoder(encoder_layer, backbone_ln)

    def forward(self, images):
        feature = self.resnet(images)
        n, c, h, w = feature.shape
        feature = feature.view(n, c, -1).permute(2, 0, 1)
        feature = self.pos_encoder(feature)
        feature = self.transformer(feature)
        feature = feature.permute(1, 2, 0).view(n, c, h, w)
        return feature


================================================
FILE: src/parseq/strhub/models/abinet/model.py
================================================
import torch
import torch.nn as nn


class Model(nn.Module):

    def __init__(self, dataset_max_length: int, null_label: int):
        super().__init__()
        self.max_length = dataset_max_length + 1  # additional stop token
        self.null_label = null_label

    def _get_length(self, logit, dim=-1):
        """ Greed decoder to obtain length from logit"""
        out = (logit.argmax(dim=-1) == self.null_label)
        abn = out.any(dim)
        out = ((out.cumsum(dim) == 1) & out).max(dim)[1]
        out = out + 1  # additional end token
        out = torch.where(abn, out, out.new_tensor(logit.shape[1], device=out.device))
        return out

    @staticmethod
    def _get_padding_mask(length, max_length):
        length = length.unsqueeze(-1)
        grid = torch.arange(0, max_length, device=length.device).unsqueeze(0)
        return grid >= length

    @staticmethod
    def _get_location_mask(sz, device=None):
        mask = torch.eye(sz, device=device)
        mask = mask.float().masked_fill(mask == 1, float('-inf'))
        return mask


================================================
FILE: src/parseq/strhub/models/abinet/model_abinet_iter.py
================================================
import torch
from torch import nn

from .model_alignment import BaseAlignment
from .model_language import BCNLanguage
from .model_vision import BaseVision


class ABINetIterModel(nn.Module):
    def __init__(self, dataset_max_length, null_label, num_classes, iter_size=1,
                 d_model=512, nhead=8, d_inner=2048, dropout=0.1, activation='relu',
                 v_loss_weight=1., v_attention='position', v_attention_mode='nearest',
                 v_backbone='transformer', v_num_layers=2,
                 l_loss_weight=1., l_num_layers=4, l_detach=True, l_use_self_attn=False,
                 a_loss_weight=1.):
        super().__init__()
        self.iter_size = iter_size
        self.vision = BaseVision(dataset_max_length, null_label, num_classes, v_attention, v_attention_mode,
                                 v_loss_weight, d_model, nhead, d_inner, dropout, activation, v_backbone, v_num_layers)
        self.language = BCNLanguage(dataset_max_length, null_label, num_classes, d_model, nhead, d_inner, dropout,
                                    activation, l_num_layers, l_detach, l_use_self_attn, l_loss_weight)
        self.alignment = BaseAlignment(dataset_max_length, null_label, num_classes, d_model, a_loss_weight)

    def forward(self, images):
        v_res = self.vision(images)
        a_res = v_res
        all_l_res, all_a_res = [], []
        for _ in range(self.iter_size):
            tokens = torch.softmax(a_res['logits'], dim=-1)
            lengths = a_res['pt_lengths']
            lengths.clamp_(2, self.language.max_length)  # TODO:move to langauge model
            l_res = self.language(tokens, lengths)
            all_l_res.append(l_res)
            a_res = self.alignment(l_res['feature'], v_res['feature'])
            all_a_res.append(a_res)
        if self.training:
            return all_a_res, all_l_res, v_res
        else:
            return a_res, all_l_res[-1], v_res


================================================
FILE: src/parseq/strhub/models/abinet/model_alignment.py
================================================
import torch
import torch.nn as nn

from .model import Model


class BaseAlignment(Model):
    def __init__(self, dataset_max_length, null_label, num_classes, d_model=512, loss_weight=1.0):
        super().__init__(dataset_max_length, null_label)
        self.loss_weight = loss_weight
        self.w_att = nn.Linear(2 * d_model, d_model)
        self.cls = nn.Linear(d_model, num_classes)

    def forward(self, l_feature, v_feature):
        """
        Args:
            l_feature: (N, T, E) where T is length, N is batch size and d is dim of model
            v_feature: (N, T, E) shape the same as l_feature 
        """
        f = torch.cat((l_feature, v_feature), dim=2)
        f_att = torch.sigmoid(self.w_att(f))
        output = f_att * v_feature + (1 - f_att) * l_feature

        logits = self.cls(output)  # (N, T, C)
        pt_lengths = self._get_length(logits)

        return {'logits': logits, 'pt_lengths': pt_lengths, 'loss_weight': self.loss_weight,
                'name': 'alignment'}


================================================
FILE: src/parseq/strhub/models/abinet/model_language.py
================================================
import torch.nn as nn
from torch.nn import TransformerDecoder

from .model import Model
from .transformer import PositionalEncoding, TransformerDecoderLayer


class BCNLanguage(Model):
    def __init__(self, dataset_max_length, null_label, num_classes, d_model=512, nhead=8, d_inner=2048, dropout=0.1,
                 activation='relu', num_layers=4, detach=True, use_self_attn=False, loss_weight=1.0,
                 global_debug=False):
        super().__init__(dataset_max_length, null_label)
        self.detach = detach
        self.loss_weight = loss_weight
        self.proj = nn.Linear(num_classes, d_model, False)
        self.token_encoder = PositionalEncoding(d_model, max_len=self.max_length)
        self.pos_encoder = PositionalEncoding(d_model, dropout=0, max_len=self.max_length)
        decoder_layer = TransformerDecoderLayer(d_model, nhead, d_inner, dropout,
                                                activation, self_attn=use_self_attn, debug=global_debug)
        self.model = TransformerDecoder(decoder_layer, num_layers)
        self.cls = nn.Linear(d_model, num_classes)

    def forward(self, tokens, lengths):
        """
        Args:
            tokens: (N, T, C) where T is length, N is batch size and C is classes number
            lengths: (N,)
        """
        if self.detach:
            tokens = tokens.detach()
        embed = self.proj(tokens)  # (N, T, E)
        embed = embed.permute(1, 0, 2)  # (T, N, E)
        embed = self.token_encoder(embed)  # (T, N, E)
        padding_mask = self._get_padding_mask(lengths, self.max_length)

        zeros = embed.new_zeros(*embed.shape)
        qeury = self.pos_encoder(zeros)
        location_mask = self._get_location_mask(self.max_length, tokens.device)
        output = self.model(qeury, embed,
                            tgt_key_padding_mask=padding_mask,
                            memory_mask=location_mask,
                            memory_key_padding_mask=padding_mask)  # (T, N, E)
        output = output.permute(1, 0, 2)  # (N, T, E)

        logits = self.cls(output)  # (N, T, C)
        pt_lengths = self._get_length(logits)

        res = {'feature': output, 'logits': logits, 'pt_lengths': pt_lengths,
               'loss_weight': self.loss_weight, 'name': 'language'}
        return res


================================================
FILE: src/parseq/strhub/models/abinet/model_vision.py
================================================
from torch import nn

from .attention import PositionAttention, Attention
from .backbone import ResTranformer
from .model import Model
from .resnet import resnet45


class BaseVision(Model):
    def __init__(self, dataset_max_length, null_label, num_classes,
                 attention='position', attention_mode='nearest', loss_weight=1.0,
                 d_model=512, nhead=8, d_inner=2048, dropout=0.1, activation='relu',
                 backbone='transformer', backbone_ln=2):
        super().__init__(dataset_max_length, null_label)
        self.loss_weight = loss_weight
        self.out_channels = d_model

        if backbone == 'transformer':
            self.backbone = ResTranformer(d_model, nhead, d_inner, dropout, activation, backbone_ln)
        else:
            self.backbone = resnet45()

        if attention == 'position':
            self.attention = PositionAttention(
                max_length=self.max_length,
                mode=attention_mode
            )
        elif attention == 'attention':
            self.attention = Attention(
                max_length=self.max_length,
                n_feature=8 * 32,
            )
        else:
            raise ValueError(f'invalid attention: {attention}')

        self.cls = nn.Linear(self.out_channels, num_classes)

    def forward(self, images):
        features = self.backbone(images)  # (N, E, H, W)
        attn_vecs, attn_scores = self.attention(features)  # (N, T, E), (N, T, H, W)
        logits = self.cls(attn_vecs)  # (N, T, C)
        pt_lengths = self._get_length(logits)

        return {'feature': attn_vecs, 'logits': logits, 'pt_lengths': pt_lengths,
                'attn_scores': attn_scores, 'loss_weight': self.loss_weight, 'name': 'vision'}


================================================
FILE: src/parseq/strhub/models/abinet/resnet.py
================================================
import math
from typing import Optional, Callable

import torch.nn as nn
from torchvision.models import resnet


class BasicBlock(resnet.BasicBlock):

    def __init__(self, inplanes: int, planes: int, stride: int = 1, downsample: Optional[nn.Module] = None,
                 groups: int = 1, base_width: int = 64, dilation: int = 1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None) -> None:
        super().__init__(inplanes, planes, stride, downsample, groups, base_width, dilation, norm_layer)
        self.conv1 = resnet.conv1x1(inplanes, planes)
        self.conv2 = resnet.conv3x3(planes, planes, stride)


class ResNet(nn.Module):

    def __init__(self, block, layers):
        super().__init__()
        self.inplanes = 32
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.layer1 = self._make_layer(block, 32, layers[0], stride=2)
        self.layer2 = self._make_layer(block, 64, layers[1], stride=1)
        self.layer3 = self._make_layer(block, 128, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 256, layers[3], stride=1)
        self.layer5 = self._make_layer(block, 512, layers[4], stride=1)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        return x


def resnet45():
    return ResNet(BasicBlock, [3, 4, 6, 6, 3])


================================================
FILE: src/parseq/strhub/models/abinet/system.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import math
from typing import Any, Tuple, List, Optional

import torch
import torch.nn.functional as F
from torch import Tensor, nn
from torch.optim import AdamW
from torch.optim.lr_scheduler import OneCycleLR

from pytorch_lightning.utilities.types import STEP_OUTPUT
from timm.optim.optim_factory import param_groups_weight_decay

from strhub.models.base import CrossEntropySystem
from strhub.models.utils import init_weights
from .model_abinet_iter import ABINetIterModel as Model

log = logging.getLogger(__name__)


class ABINet(CrossEntropySystem):

    def __init__(self, charset_train: str, charset_test: str, max_label_length: int,
                 batch_size: int, lr: float, warmup_pct: float, weight_decay: float,
                 iter_size: int, d_model: int, nhead: int, d_inner: int, dropout: float, activation: str,
                 v_loss_weight: float, v_attention: str, v_attention_mode: str, v_backbone: str, v_num_layers: int,
                 l_loss_weight: float, l_num_layers: int, l_detach: bool, l_use_self_attn: bool,
                 l_lr: float, a_loss_weight: float, lm_only: bool = False, **kwargs) -> None:
        super().__init__(charset_train, charset_test, batch_size, lr, warmup_pct, weight_decay)
        self.scheduler = None
        self.save_hyperparameters()
        self.max_label_length = max_label_length
        self.num_classes = len(self.tokenizer) - 2  # We don't predict <bos> nor <pad>
        self.model = Model(max_label_length, self.eos_id, self.num_classes, iter_size, d_model, nhead, d_inner,
                           dropout, activation, v_loss_weight, v_attention, v_attention_mode, v_backbone, v_num_layers,
                           l_loss_weight, l_num_layers, l_detach, l_use_self_attn, a_loss_weight)
        self.model.apply(init_weights)
        # FIXME: doesn't support resumption from checkpoint yet
        self._reset_alignment = True
        self._reset_optimizers = True
        self.l_lr = l_lr
        self.lm_only = lm_only
        # Train LM only. Freeze other submodels.
        if lm_only:
            self.l_lr = lr  # for tuning
            self.model.vision.requires_grad_(False)
            self.model.alignment.requires_grad_(False)

    @property
    def _pretraining(self):
        # In the original work, VM was pretrained for 8 epochs while full model was trained for an additional 10 epochs.
        total_steps = self.trainer.estimated_stepping_batches * self.trainer.accumulate_grad_batches
        return self.global_step < (8 / (8 + 10)) * total_steps

    @torch.jit.ignore
    def no_weight_decay(self):
        return {'model.language.proj.weight'}

    def _add_weight_decay(self, model: nn.Module, skip_list=()):
        if self.weight_decay:
            return param_groups_weight_decay(model, self.weight_decay, skip_list)
        else:
            return [{'params': model.parameters()}]

    def configure_optimizers(self):
        agb = self.trainer.accumulate_grad_batches
        # Linear scaling so that the effective learning rate is constant regardless of the number of GPUs used with DDP.
        lr_scale = agb * math.sqrt(self.trainer.num_devices) * self.batch_size / 256.
        lr = lr_scale * self.lr
        l_lr = lr_scale * self.l_lr
        params = []
        params.extend(self._add_weight_decay(self.model.vision))
        params.extend(self._add_weight_decay(self.model.alignment))
        # We use a different learning rate for the LM.
        for p in self._add_weight_decay(self.model.language, ('proj.weight',)):
            p['lr'] = l_lr
            params.append(p)
        max_lr = [p.get('lr', lr) for p in params]
        optim = AdamW(params, lr)
        self.scheduler = OneCycleLR(optim, max_lr, self.trainer.estimated_stepping_batches,
                                    pct_start=self.warmup_pct, cycle_momentum=False)
        return {'optimizer': optim, 'lr_scheduler': {'scheduler': self.scheduler, 'interval': 'step'}}

    def forward(self, images: Tensor, max_length: Optional[int] = None) -> Tensor:
        max_length = self.max_label_length if max_length is None else min(max_length, self.max_label_length)
        logits = self.model.forward(images)[0]['logits']
        return logits[:, :max_length + 1]  # truncate

    def calc_loss(self, targets, *res_lists) -> Tensor:
        total_loss = 0
        for res_list in res_lists:
            loss = 0
            if isinstance(res_list, dict):
                res_list = [res_list]
            for res in res_list:
                logits = res['logits'].flatten(end_dim=1)
                loss += F.cross_entropy(logits, targets.flatten(), ignore_index=self.pad_id)
            loss /= len(res_list)
            self.log('loss_' + res_list[0]['name'], loss)
            total_loss += res_list[0]['loss_weight'] * loss
        return total_loss

    def on_train_batch_start(self, batch: Any, batch_idx: int) -> None:
        if not self._pretraining and self._reset_optimizers:
            log.info('Pretraining ends. Updating base LRs.')
            self._reset_optimizers = False
            # Make base_lr the same for all groups
            base_lr = self.scheduler.base_lrs[0]  # base_lr of group 0 - VM
            self.scheduler.base_lrs = [base_lr] * len(self.scheduler.base_lrs)

    def _prepare_inputs_and_targets(self, labels):
        # Use dummy label to ensure sequence length is constant.
        dummy = ['0' * self.max_label_length]
        targets = self.tokenizer.encode(dummy + list(labels), self.device)[1:]
        targets = targets[:, 1:]  # remove <bos>. Unused here.
        # Inputs are padded with eos_id
        inputs = torch.where(targets == self.pad_id, self.eos_id, targets)
        inputs = F.one_hot(inputs, self.num_classes).float()
        lengths = torch.as_tensor(list(map(len, labels)), device=self.device) + 1  # +1 for eos
        return inputs, lengths, targets

    def training_step(self, batch, batch_idx) -> STEP_OUTPUT:
        images, labels = batch
        inputs, lengths, targets = self._prepare_inputs_and_targets(labels)
        if self.lm_only:
            l_res = self.model.language(inputs, lengths)
            loss = self.calc_loss(targets, l_res)
        # Pretrain submodels independently first
        elif self._pretraining:
            # Vision
            v_res = self.model.vision(images)
            # Language
            l_res = self.model.language(inputs, lengths)
            # We also train the alignment model to 'satisfy' DDP requirements (all parameters should be used).
            # We'll reset its parameters prior to joint training.
            a_res = self.model.alignment(l_res['feature'].detach(), v_res['feature'].detach())
            loss = self.calc_loss(targets, v_res, l_res, a_res)
        else:
            # Reset alignment model's parameters once prior to full model training.
            if self._reset_alignment:
                log.info('Pretraining ends. Resetting alignment model.')
                self._reset_alignment = False
                self.model.alignment.apply(init_weights)
            all_a_res, all_l_res, v_res = self.model.forward(images)
            loss = self.calc_loss(targets, v_res, all_l_res, all_a_res)
        self.log('loss', loss)
        return loss

    def forward_logits_loss(self, images: Tensor, labels: List[str]) -> Tuple[Tensor, Tensor, int]:
        if self.lm_only:
            inputs, lengths, targets = self._prepare_inputs_and_targets(labels)
            l_res = self.model.language(inputs, lengths)
            loss = self.calc_loss(targets, l_res)
            loss_numel = (targets != self.pad_id).sum()
            return l_res['logits'], loss, loss_numel
        else:
            return super().forward_logits_loss(images, labels)


================================================
FILE: src/parseq/strhub/models/abinet/transformer.py
================================================
import math

import torch
import torch.nn.functional as F
from torch import nn
from torch.nn.modules.transformer import _get_activation_fn


class TransformerDecoderLayer(nn.Module):
    r"""TransformerDecoderLayer is made up of self-attn, multi-head-attn and feedforward network.
    This standard decoder layer is based on the paper "Attention Is All You Need".
    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
    Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Advances in
    Neural Information Processing Systems, pages 6000-6010. Users may modify or implement
    in a different way during application.

    Args:
        d_model: the number of expected features in the input (required).
        nhead: the number of heads in the multiheadattention models (required).
        dim_feedforward: the dimension of the feedforward network model (default=2048).
        dropout: the dropout value (default=0.1).
        activation: the activation function of intermediate layer, relu or gelu (default=relu).

    Examples::
        >>> decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
        >>> memory = torch.rand(10, 32, 512)
        >>> tgt = torch.rand(20, 32, 512)
        >>> out = decoder_layer(tgt, memory)
    """

    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
                 activation="relu", self_attn=True, siamese=False, debug=False):
        super().__init__()
        self.has_self_attn, self.siamese = self_attn, siamese
        self.debug = debug
        if self.has_self_attn:
            self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
            self.norm1 = nn.LayerNorm(d_model)
            self.dropout1 = nn.Dropout(dropout)
        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)
        if self.siamese:
            self.multihead_attn2 = nn.MultiheadAttention(d_model, nhead, dropout=dropout)

        self.activation = _get_activation_fn(activation)

    def __setstate__(self, state):
        if 'activation' not in state:
            state['activation'] = F.relu
        super().__setstate__(state)

    def forward(self, tgt, memory, tgt_mask=None, memory_mask=None,
                tgt_key_padding_mask=None, memory_key_padding_mask=None,
                memory2=None, memory_mask2=None, memory_key_padding_mask2=None):
        # type: (Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor], Optional[Tensor]) -> Tensor
        r"""Pass the inputs (and mask) through the decoder layer.

        Args:
            tgt: the sequence to the decoder layer (required).
            memory: the sequence from the last layer of the encoder (required).
            tgt_mask: the mask for the tgt sequence (optional).
            memory_mask: the mask for the memory sequence (optional).
            tgt_key_padding_mask: the mask for the tgt keys per batch (optional).
            memory_key_padding_mask: the mask for the memory keys per batch (optional).

        Shape:
            see the docs in Transformer class.
        """
        if self.has_self_attn:
            tgt2, attn = self.self_attn(tgt, tgt, tgt, attn_mask=tgt_mask,
                                        key_padding_mask=tgt_key_padding_mask)
            tgt = tgt + self.dropout1(tgt2)
            tgt = self.norm1(tgt)
            if self.debug: self.attn = attn
        tgt2, attn2 = self.multihead_attn(tgt, memory, memory, attn_mask=memory_mask,
                                          key_padding_mask=memory_key_padding_mask)
        if self.debug: self.attn2 = attn2

        if self.siamese:
            tgt3, attn3 = self.multihead_attn2(tgt, memory2, memory2, attn_mask=memory_mask2,
                                               key_padding_mask=memory_key_padding_mask2)
            tgt = tgt + self.dropout2(tgt3)
            if self.debug: self.attn3 = attn3

        tgt = tgt + self.dropout2(tgt2)
        tgt = self.norm2(tgt)
        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
        tgt = tgt + self.dropout3(tgt2)
        tgt = self.norm3(tgt)

        return tgt


class PositionalEncoding(nn.Module):
    r"""Inject some information about the relative or absolute position of the tokens
        in the sequence. The positional encodings have the same dimension as
        the embeddings, so that the two can be summed. Here, we use sine and cosine
        functions of different frequencies.
    .. math::
        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
        \text{where pos is the word position and i is the embed idx)
    Args:
        d_model: the embed dim (required).
        dropout: the dropout value (default=0.1).
        max_len: the max. length of the incoming sequence (default=5000).
    Examples:
        >>> pos_encoder = PositionalEncoding(d_model)
    """

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        r"""Inputs of forward function
        Args:
            x: the sequence fed to the positional encoder model (required).
        Shape:
            x: [sequence length, batch size, embed dim]
            output: [sequence length, batch size, embed dim]
        Examples:
            >>> output = pos_encoder(x)
        """

        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)


================================================
FILE: src/parseq/strhub/models/base.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional, Tuple, List

import pytorch_lightning as pl
import torch
import torch.nn.functional as F
from pytorch_lightning.utilities.types import STEP_OUTPUT
from timm.optim import create_optimizer_v2
from torch import Tensor
from torch.optim import Optimizer
from torch.optim.lr_scheduler import OneCycleLR

from strhub.data.utils import CharsetAdapter, CTCTokenizer, Tokenizer, BaseTokenizer


@dataclass
class BatchResult:
    num_samples: int
    correct: int
    ned: float
    confidence: float
    label_length: int
    loss: Tensor
    loss_numel: int


class BaseSystem(pl.LightningModule, ABC):

    def __init__(self, tokenizer: BaseTokenizer, charset_test: str,
                 batch_size: int, lr: float, warmup_pct: float, weight_decay: float) -> None:
        super().__init__()
        self.tokenizer = tokenizer
        self.charset_adapter = CharsetAdapter(charset_test)
        self.batch_size = batch_size
        self.lr = lr
        self.warmup_pct = warmup_pct
        self.weight_decay = weight_decay

    @abstractmethod
    def forward(self, images: Tensor, max_length: Optional[int] = None) -> Tensor:
        """Inference

        Args:
            images: Batch of images. Shape: N, Ch, H, W
            max_length: Max sequence length of the output. If None, will use default.

        Returns:
            logits: N, L, C (L = sequence length, C = number of classes, typically len(charset_train) + num specials)
        """
        raise NotImplementedError

    @abstractmethod
    def forward_logits_loss(self, images: Tensor, labels: List[str]) -> Tuple[Tensor, Tensor, int]:
        """Like forward(), but also computes the loss (calls forward() internally).

        Args:
            images: Batch of images. Shape: N, Ch, H, W
            labels: Text labels of the images

        Returns:
            logits: N, L, C (L = sequence length, C = number of classes, typically len(charset_train) + num specials)
            loss: mean loss for the batch
            loss_numel: number of elements the loss was calculated from
        """
        raise NotImplementedError

    def configure_optimizers(self):
        agb = self.trainer.accumulate_grad_batches
        # Linear scaling so that the effective learning rate is constant regardless of the number of GPUs used with DDP.
        lr_scale = agb * math.sqrt(self.trainer.num_devices) * self.batch_size / 256.
        lr = lr_scale * self.lr
        optim = create_optimizer_v2(self, 'adamw', lr, self.weight_decay)
        sched = OneCycleLR(optim, lr, self.trainer.estimated_stepping_batches, pct_start=self.warmup_pct,
                           cycle_momentum=False)
        return {'optimizer': optim, 'lr_scheduler': {'scheduler': sched, 'interval': 'step'}}

    def optimizer_zero_grad(self, epoch: int, batch_idx: int, optimizer: Optimizer, optimizer_idx: int):
        optimizer.zero_grad(set_to_none=True)

    def _eval_step(self, batch, validation: bool) -> Optional[STEP_OUTPUT]:
        images, labels = batch

        correct = 0
        total = 0
        ned = 0
        confidence = 0
        label_length = 0
        if validation:
            logits, loss, loss_numel = self.forward_logits_loss(images, labels)
        else:
            # At test-time, we shouldn't specify a max_label_length because the test-time charset used
            # might be different from the train-time charset. max_label_length in eval_logits_loss() is computed
            # based on the transformed label, which could be wrong if the actual gt label contains characters existing
            # in the train-time charset but not in the test-time charset. For example, "aishahaleyes.blogspot.com"
            # is exactly 25 characters, but if processed by CharsetAdapter for the 36-char set, it becomes 23 characters
            # long only, which sets max_label_length = 23. This will cause the model prediction to be truncated.
            logits = self.forward(images)
            loss = loss_numel = None  # Only used for validation; not needed at test-time.

        probs = logits.softmax(-1)
        preds, probs = self.tokenizer.decode(probs)
        for pred, prob, gt in zip(preds, probs, labels):
            confidence += prob.prod().item()
            pred = self.charset_adapter(pred)
            # Follow ICDAR 2019 definition of N.E.D.
            ned += edit_distance(pred, gt) / max(len(pred), len(gt))
            if pred == gt:
                correct += 1
            total += 1
            label_length += len(pred)
        return dict(output=BatchResult(total, correct, ned, confidence, label_length, loss, loss_numel))

    @staticmethod
    def _aggregate_results(outputs) -> Tuple[float, float, float]:
        if not outputs:
            return 0., 0., 0.
        total_loss = 0
        total_loss_numel = 0
        total_n_correct = 0
        total_norm_ED = 0
        total_size = 0
        for result in outputs:
            result = result['output']
            total_loss += result.loss_numel * result.loss
            total_loss_numel += result.loss_numel
            total_n_correct += result.correct
            total_norm_ED += result.ned
            total_size += result.num_samples
        acc = total_n_correct / total_size
        ned = (1 - total_norm_ED / total_size)
        loss = total_loss / total_loss_numel
        return acc, ned, loss

    def validation_step(self, batch, batch_idx) -> Optional[STEP_OUTPUT]:
        return self._eval_step(batch, True)

    def validation_epoch_end(self, outputs) -> None:
        acc, ned, loss = self._aggregate_results(outputs)
        self.log('val_accuracy', 100 * acc, sync_dist=True)
        self.log('val_NED', 100 * ned, sync_dist=True)
        self.log('val_loss', loss, sync_dist=True)
        self.log('hp_metric', acc, sync_dist=True)

    def test_step(self, batch, batch_idx) -> Optional[STEP_OUTPUT]:
        return self._eval_step(batch, False)


class CrossEntropySystem(BaseSystem):

    def __init__(self, charset_train: str, charset_test: str,
                 batch_size: int, lr: float, warmup_pct: float, weight_decay: float) -> None:
        tokenizer = Tokenizer(charset_train)
        super().__init__(tokenizer, charset_test, batch_size, lr, warmup_pct, weight_decay)
        self.bos_id = tokenizer.bos_id
        self.eos_id = tokenizer.eos_id
        self.pad_id = tokenizer.pad_id

    def forward_logits_loss(self, images: Tensor, labels: List[str]) -> Tuple[Tensor, Tensor, int]:
        targets = self.tokenizer.encode(labels, self.device)
        targets = targets[:, 1:]  # Discard <bos>
        max_len = targets.shape[1] - 1  # exclude <eos> from count
        logits = self.forward(images, max_len)
        loss = F.cross_entropy(logits.flatten(end_dim=1), targets.flatten(), ignore_index=self.pad_id)
        loss_numel = (targets != self.pad_id).sum()
        return logits, loss, loss_numel


class CTCSystem(BaseSystem):

    def __init__(self, charset_train: str, charset_test: str,
                 batch_size: int, lr: float, warmup_pct: float, weight_decay: float) -> None:
        tokenizer = CTCTokenizer(charset_train)
        super().__init__(tokenizer, charset_test, batch_size, lr, warmup_pct, weight_decay)
        self.blank_id = tokenizer.blank_id

    def forward_logits_loss(self, images: Tensor, labels: List[str]) -> Tuple[Tensor, Tensor, int]:
        targets = self.tokenizer.encode(labels, self.device)
        logits = self.forward(images)
        log_probs = logits.log_softmax(-1).transpose(0, 1)  # swap batch and seq. dims
        T, N, _ = log_probs.shape
        input_lengths = torch.full(size=(N,), fill_value=T, dtype=torch.long, device=self.device)
        target_lengths = torch.as_tensor(list(map(len, labels)), dtype=torch.long, device=self.device)
        loss = F.ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=self.blank_id, zero_infinity=True)
        return logits, loss, N


================================================
FILE: src/parseq/strhub/models/crnn/LICENSE
================================================
The MIT License (MIT)

Copyright (c) 2017 Jieru Mei <meijieru@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: src/parseq/strhub/models/crnn/__init__.py
================================================
r"""
Shi, Baoguang, Xiang Bai, and Cong Yao.
"An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition."
IEEE transactions on pattern analysis and machine intelligence 39, no. 11 (2016): 2298-2304.

https://arxiv.org/abs/1507.05717

All source files, except `system.py`, are based on the implementation listed below,
and hence are released under the license of the original.

Source: https://github.com/meijieru/crnn.pytorch
License: MIT License (see included LICENSE file)
"""


================================================
FILE: src/parseq/strhub/models/crnn/model.py
================================================
import torch.nn as nn

from strhub.models.modules import BidirectionalLSTM


class CRNN(nn.Module):

    def __init__(self, img_h, nc, nclass, nh, leaky_relu=False):
        super().__init__()
        assert img_h % 16 == 0, 'img_h has to be a multiple of 16'

        ks = [3, 3, 3, 3, 3, 3, 2]
        ps = [1, 1, 1, 1, 1, 1, 0]
        ss = [1, 1, 1, 1, 1, 1, 1]
        nm = [64, 128, 256, 256, 512, 512, 512]

        cnn = nn.Sequential()

        def convRelu(i, batchNormalization=False):
            nIn = nc if i == 0 else nm[i - 1]
            nOut = nm[i]
            cnn.add_module('conv{0}'.format(i),
                           nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i], bias=not batchNormalization))
            if batchNormalization:
                cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
            if leaky_relu:
                cnn.add_module('relu{0}'.format(i),
                               nn.LeakyReLU(0.2, inplace=True))
            else:
                cnn.add_module('relu{0}'.format(i), nn.ReLU(True))

        convRelu(0)
        cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
        convRelu(1)
        cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
        convRelu(2, True)
        convRelu(3)
        cnn.add_module('pooling{0}'.format(2),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
        convRelu(4, True)
        convRelu(5)
        cnn.add_module('pooling{0}'.format(3),
                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
        convRelu(6, True)  # 512x1x16

        self.cnn = cnn
        self.rnn = nn.Sequential(
            BidirectionalLSTM(512, nh, nh),
            BidirectionalLSTM(nh, nh, nclass))

    def forward(self, input):
        # conv features
        conv = self.cnn(input)
        b, c, h, w = conv.size()
        assert h == 1, 'the height of conv must be 1'
        conv = conv.squeeze(2)
        conv = conv.transpose(1, 2)  # [b, w, c]

        # rnn features
        output = self.rnn(conv)

        return output


================================================
FILE: src/parseq/strhub/models/crnn/system.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Sequence, Optional

from pytorch_lightning.utilities.types import STEP_OUTPUT
from torch import Tensor

from strhub.models.base import CTCSystem
from strhub.models.utils import init_weights
from .model import CRNN as Model


class CRNN(CTCSystem):

    def __init__(self, charset_train: str, charset_test: str, max_label_length: int,
                 batch_size: int, lr: float, warmup_pct: float, weight_decay: float,
                 img_size: Sequence[int], hidden_size: int, leaky_relu: bool, **kwargs) -> None:
        super().__init__(charset_train, charset_test, batch_size, lr, warmup_pct, weight_decay)
        self.save_hyperparameters()
        self.model = Model(img_size[0], 3, len(self.tokenizer), hidden_size, leaky_relu)
        self.model.apply(init_weights)

    def forward(self, images: Tensor, max_length: Optional[int] = None) -> Tensor:
        return self.model.forward(images)

    def training_step(self, batch, batch_idx) -> STEP_OUTPUT:
        images, labels = batch
        loss = self.forward_logits_loss(images, labels)[1]
        self.log('loss', loss)
        return loss


================================================
FILE: src/parseq/strhub/models/modules.py
================================================
r"""Shared modules used by CRNN and TRBA"""
from torch import nn


class BidirectionalLSTM(nn.Module):
    """Ref: https://github.com/clovaai/deep-text-recognition-benchmark/blob/master/modules/sequence_modeling.py"""

    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, bidirectional=True, batch_first=True)
        self.linear = nn.Linear(hidden_size * 2, output_size)

    def forward(self, input):
        """
        input : visual feature [batch_size x T x input_size], T = num_steps.
        output : contextual feature [batch_size x T x output_size]
        """
        recurrent, _ = self.rnn(input)  # batch_size x T x input_size -> batch_size x T x (2*hidden_size)
        output = self.linear(recurrent)  # batch_size x T x output_size
        return output


================================================
FILE: src/parseq/strhub/models/parseq/__init__.py
================================================


================================================
FILE: src/parseq/strhub/models/parseq/modules.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
from typing import Optional

import torch
from torch import nn as nn, Tensor
from torch.nn import functional as F
from torch.nn.modules import transformer

from timm.models.vision_transformer import VisionTransformer, PatchEmbed


class DecoderLayer(nn.Module):
    """A Transformer decoder layer supporting two-stream attention (XLNet)
       This implements a pre-LN decoder, as opposed to the post-LN default in PyTorch."""

    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation='gelu',
                 layer_norm_eps=1e-5):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=True)
        self.cross_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=True)
        # Implementation of Feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps)
        self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps)
        self.norm_q = nn.LayerNorm(d_model, eps=layer_norm_eps)
        self.norm_c = nn.LayerNorm(d_model, eps=layer_norm_eps)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)

        self.activation = transformer._get_activation_fn(activation)

    def __setstate__(self, state):
        if 'activation' not in state:
            state['activation'] = F.gelu
        super().__setstate__(state)

    def forward_stream(self, tgt: Tensor, tgt_norm: Tensor, tgt_kv: Tensor, memory: Tensor, tgt_mask: Optional[Tensor],
                       tgt_key_padding_mask: Optional[Tensor]):
        """Forward pass for a single stream (i.e. content or query)
        tgt_norm is just a LayerNorm'd tgt. Added as a separate parameter for efficiency.
        Both tgt_kv and memory are expected to be LayerNorm'd too.
        memory is LayerNorm'd by ViT.
        """
        tgt2, sa_weights = self.self_attn(tgt_norm, tgt_kv, tgt_kv, attn_mask=tgt_mask,
                                          key_padding_mask=tgt_key_padding_mask)
        tgt = tgt + self.dropout1(tgt2)

        tgt2, ca_weights = self.cross_attn(self.norm1(tgt), memory, memory)
        tgt = tgt + self.dropout2(tgt2)

        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(self.norm2(tgt)))))
        tgt = tgt + self.dropout3(tgt2)
        return tgt, sa_weights, ca_weights

    def forward(self, query, content, memory, query_mask: Optional[Tensor] = None, content_mask: Optional[Tensor] = None,
                content_key_padding_mask: Optional[Tensor] = None, update_content: bool = True):
        query_norm = self.norm_q(query)
        content_norm = self.norm_c(content)
        query = self.forward_stream(query, query_norm, content_norm, memory, query_mask, content_key_padding_mask)[0]
        if update_content:
            content = self.forward_stream(content, content_norm, content_norm, memory, content_mask,
                                          content_key_padding_mask)[0]
        return query, content


class Decoder(nn.Module):
    __constants__ = ['norm']

    def __init__(self, decoder_layer, num_layers, norm):
        super().__init__()
        self.layers = transformer._get_clones(decoder_layer, num_layers)
        self.num_layers = num_layers
        self.norm = norm

    def forward(self, query, content, memory, query_mask: Optional[Tensor] = None, content_mask: Optional[Tensor] = None,
                content_key_padding_mask: Optional[Tensor] = None):
        for i, mod in enumerate(self.layers):
            last = i == len(self.layers) - 1
            query, content = mod(query, content, memory, query_mask, content_mask, content_key_padding_mask,
                                 update_content=not last)
        query = self.norm(query)
        return query


class Encoder(VisionTransformer):

    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.,
                 qkv_bias=True, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., embed_layer=PatchEmbed):
        super().__init__(img_size, patch_size, in_chans, embed_dim=embed_dim, depth=depth, num_heads=num_heads,
                         mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop_rate=drop_rate, attn_drop_rate=attn_drop_rate,
                         drop_path_rate=drop_path_rate, embed_layer=embed_layer,
                         num_classes=0, global_pool='', class_token=False)  # these disable the classifier head

    def forward(self, x):
        # Return all tokens
        return self.forward_features(x)


class TokenEmbedding(nn.Module):

    def __init__(self, charset_size: int, embed_dim: int):
        super().__init__()
        self.embedding = nn.Embedding(charset_size, embed_dim)
        self.embed_dim = embed_dim

    def forward(self, tokens: torch.Tensor):
        return math.sqrt(self.embed_dim) * self.embedding(tokens)


================================================
FILE: src/parseq/strhub/models/parseq/system.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
from functools import partial
from itertools import permutations
from typing import Sequence, Any, Optional

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor

from pytorch_lightning.utilities.types import STEP_OUTPUT
from timm.models.helpers import named_apply

from strhub.models.base import CrossEntropySystem
from strhub.models.utils import init_weights
from .modules import DecoderLayer, Decoder, Encoder, TokenEmbedding


class PARSeq(CrossEntropySystem):

    def __init__(self, charset_train: str, charset_test: str, max_label_length: int,
                 batch_size: int, lr: float, warmup_pct: float, weight_decay: float,
                 img_size: Sequence[int], patch_size: Sequence[int], embed_dim: int,
                 enc_num_heads: int, enc_mlp_ratio: int, enc_depth: int,
                 dec_num_heads: int, dec_mlp_ratio: int, dec_depth: int,
                 perm_num: int, perm_forward: bool, perm_mirrored: bool,
                 decode_ar: bool, refine_iters: int, dropout: float, **kwargs: Any) -> None:
        super().__init__(charset_train, charset_test, batch_size, lr, warmup_pct, weight_decay)
        self.save_hyperparameters()

        self.max_label_length = max_label_length
        self.decode_ar = decode_ar
        self.refine_iters = refine_iters

        self.encoder = Encoder(img_size, patch_size, embed_dim=embed_dim, depth=enc_depth, num_heads=enc_num_heads,
                               mlp_ratio=enc_mlp_ratio)
        decoder_layer = DecoderLayer(embed_dim, dec_num_heads, embed_dim * dec_mlp_ratio, dropout)
        self.decoder = Decoder(decoder_layer, num_layers=dec_depth, norm=nn.LayerNorm(embed_dim))

        # Perm/attn mask stuff
        self.rng = np.random.default_rng()
        self.max_gen_perms = perm_num // 2 if perm_mirrored else perm_num
        self.perm_forward = perm_forward
        self.perm_mirrored = perm_mirrored

        # We don't predict <bos> nor <pad>
        self.head = nn.Linear(embed_dim, len(self.tokenizer) - 2)
        self.text_embed = TokenEmbedding(len(self.tokenizer), embed_dim)

        # +1 for <eos>
        self.pos_queries = nn.Parameter(torch.Tensor(1, max_label_length + 1, embed_dim))
        self.dropout = nn.Dropout(p=dropout)
        # Encoder has its own init.
        named_apply(partial(init_weights, exclude=['encoder']), self)
        nn.init.trunc_normal_(self.pos_queries, std=.02)

    @torch.jit.ignore
    def no_weight_decay(self):
        param_names = {'text_embed.embedding.weight', 'pos_queries'}
        enc_param_names = {'encoder.' + n for n in self.encoder.no_weight_decay()}
        return param_names.union(enc_param_names)

    def encode(self, img: torch.Tensor):
        return self.encoder(img)

    def decode(self, tgt: torch.Tensor, memory: torch.Tensor, tgt_mask: Optional[Tensor] = None,
               tgt_padding_mask: Optional[Tensor] = None, tgt_query: Optional[Tensor] = None,
               tgt_query_mask: Optional[Tensor] = None):
        N, L = tgt.shape
        # <bos> stands for the null context. We only supply position information for characters after <bos>.
        null_ctx = self.text_embed(tgt[:, :1])
        tgt_emb = self.pos_queries[:, :L - 1] + self.text_embed(tgt[:, 1:])
        tgt_emb = self.dropout(torch.cat([null_ctx, tgt_emb], dim=1))
        if tgt_query is None:
            tgt_query = self.pos_queries[:, :L].expand(N, -1, -1)
        tgt_query = self.dropout(tgt_query)
        return self.decoder(tgt_query, tgt_emb, memory, tgt_query_mask, tgt_mask, tgt_padding_mask)

    def forward(self, images: Tensor, max_length: Optional[int] = None) -> Tensor:
        testing = max_length is None
        max_length = self.max_label_length if max_length is None else min(max_length, self.max_label_length)
        bs = images.shape[0]
        # +1 for <eos> at end of sequence.
        num_steps = max_length + 1
        memory = self.encode(images)

        # Query positions up to `num_steps`
        pos_queries = self.pos_queries[:, :num_steps].expand(bs, -1, -1)

        # Special case for the forward permutation. Faster than using `generate_attn_masks()`
        tgt_mask = query_mask = torch.triu(torch.full((num_steps, num_steps), float('-inf'), device=self._device), 1)

        if self.decode_ar:
            tgt_in = torch.full((bs, num_steps), self.pad_id, dtype=torch.long, device=self._device)
            tgt_in[:, 0] = self.bos_id

            logits = []
            for i in range(num_steps):
                j = i + 1  # next token index
                # Efficient decoding:
                # Input the context up to the ith token. We use only one query (at position = i) at a time.
                # This works because of the lookahead masking effect of the canonical (forward) AR context.
                # Past tokens have no access to future tokens, hence are fixed once computed.
                tgt_out = self.decode(tgt_in[:, :j], memory, tgt_mask[:j, :j], tgt_query=pos_queries[:, i:j],
                                      tgt_query_mask=query_mask[i:j, :j])
                # the next token probability is in the output's ith token position
                p_i = self.head(tgt_out)
                logits.append(p_i)
                if j < num_steps:
                    # greedy decode. add the next token index to the target input
                    tgt_in[:, j] = p_i.squeeze().argmax(-1)
                    # Efficient batch decoding: If all output words have at least one EOS token, end decoding.
                    if testing and (tgt_in == self.eos_id).any(dim=-1).all():
                        break

            logits = torch.cat(logits, dim=1)
        else:
            # No prior context, so input is just <bos>. We query all positions.
            tgt_in = torch.full((bs, 1), self.bos_id, dtype=torch.long, device=self._device)
            tgt_out = self.decode(tgt_in, memory, tgt_query=pos_queries)
            logits = self.head(tgt_out)

        if self.refine_iters:
            # For iterative refinement, we always use a 'cloze' mask.
            # We can derive it from the AR forward mask by unmasking the token context to the right.
            query_mask[torch.triu(torch.ones(num_steps, num_steps, dtype=torch.bool, device=self._device), 2)] = 0
            bos = torch.full((bs, 1), self.bos_id, dtype=torch.long, device=self._device)
            for i in range(self.refine_iters):
                # Prior context is the previous output.
                tgt_in = torch.cat([bos, logits[:, :-1].argmax(-1)], dim=1)
                tgt_padding_mask = ((tgt_in == self.eos_id).int().cumsum(-1) > 0)  # mask tokens beyond the first EOS token.
                tgt_out = self.decode(tgt_in, memory, tgt_mask, tgt_padding_mask,
                                      tgt_query=pos_queries, tgt_query_mask=query_mask[:, :tgt_in.shape[1]])
                logits = self.head(tgt_out)

        return logits

    def gen_tgt_perms(self, tgt):
        """Generate shared permutations for the whole batch.
           This works because the same attention mask can be used for the shorter sequences
           because of the padding mask.
        """
        # We don't permute the position of BOS, we permute EOS separately
        max_num_chars = tgt.shape[1] - 2
        # Special handling for 1-character sequences
        if max_num_chars == 1:
            return torch.arange(3, device=self._device).unsqueeze(0)
        perms = [torch.arange(max_num_chars, device=self._device)] if self.perm_forward else []
        # Additional permutations if needed
        max_perms = math.factorial(max_num_chars)
        if self.perm_mirrored:
            max_perms //= 2
        num_gen_perms = min(self.max_gen_perms, max_perms)
        # For 4-char sequences and shorter, we generate all permutations and sample from the pool to avoid collisions
        # Note that this code path might NEVER get executed since the labels in a mini-batch typically exceed 4 chars.
        if max_num_chars < 5:
            # Pool of permutations to sample from. We only need the first half (if complementary option is selected)
            # Special handling for max_num_chars == 4 which correctly divides the pool into the flipped halves
            if max_num_chars == 4 and self.perm_mirrored:
                selector = [0, 3, 4, 6, 9, 10, 12, 16, 17, 18, 19, 21]
            else:
                selector = list(range(max_perms))
            perm_pool = torch.as_tensor(list(permutations(range(max_num_chars), max_num_chars)), device=self._device)[selector]
            # If the forward permutation is always selected, no need to add it to the pool for sampling
            if self.perm_forward:
                perm_pool = perm_pool[1:]
            perms = torch.stack(perms)
            if len(perm_pool):
                i = self.rng.choice(len(perm_pool), size=num_gen_perms - len(perms), replace=False)
                perms = torch.cat([perms, perm_pool[i]])
        else:
            perms.extend([torch.randperm(max_num_chars, device=self._device) for _ in range(num_gen_perms - len(perms))])
            perms = torch.stack(perms)
        if self.perm_mirrored:
            # Add complementary pairs
            comp = perms.flip(-1)
            # Stack in such a way that the pairs are next to each other.
            perms = torch.stack([perms, comp]).transpose(0, 1).reshape(-1, max_num_chars)
        # NOTE:
        # The only meaningful way of permuting the EOS position is by moving it one character position at a time.
        # However, since the number of permutations = T! and number of EOS positions = T + 1, the number of possible EOS
        # positions will always be much less than the number of permutations (unless a low perm_num is set).
        # Thus, it would be simpler to just train EOS using the full and null contexts rather than trying to evenly
        # distribute it across the chosen number of permutations.
        # Add position indices of BOS and EOS
        bos_idx = perms.new_zeros((len(perms), 1))
        eos_idx = perms.new_full((len(perms), 1), max_num_chars + 1)
        perms = torch.cat([bos_idx, perms + 1, eos_idx], dim=1)
        # Special handling for the reverse direction. This does two things:
        # 1. Reverse context for the characters
        # 2. Null context for [EOS] (required for learning to predict [EOS] in NAR mode)
        if len(perms) > 1:
            perms[1, 1:] = max_num_chars + 1 - torch.arange(max_num_chars + 1, device=self._device)
        return perms

    def generate_attn_masks(self, perm):
        """Generate attention masks given a sequence permutation (includes pos. for bos and eos tokens)
        :param perm: the permutation sequence. i = 0 is always the BOS
        :return: lookahead attention masks
        """
        sz = perm.shape[0]
        mask = torch.zeros((sz, sz), device=self._device)
        for i in range(sz):
            query_idx = perm[i]
            masked_keys = perm[i + 1:]
            mask[query_idx, masked_keys] = float('-inf')
        content_mask = mask[:-1, :-1].clone()
        mask[torch.eye(sz, dtype=torch.bool, device=self._device)] = float('-inf')  # mask "self"
        query_mask = mask[1:, :-1]
        return content_mask, query_mask

    def training_step(self, batch, batch_idx) -> STEP_OUTPUT:
        images, labels = batch
        tgt = self.tokenizer.encode(labels, self._device)

        # Encode the source sequence (i.e. the image codes)
        memory = self.encode(images)

        # Prepare the target sequences (input and output)
        tgt_perms = self.gen_tgt_perms(tgt)
        tgt_in = tgt[:, :-1]
        tgt_out = tgt[:, 1:]
        # The [EOS] token is not depended upon by any other token in any permutation ordering
        tgt_padding_mask = (tgt_in == self.pad_id) | (tgt_in == self.eos_id)

        loss = 0
        loss_numel = 0
        n = (tgt_out != self.pad_id).sum().item()
        for i, perm in enumerate(tgt_perms):
            tgt_mask, query_mask = self.generate_attn_masks(perm)
            out = self.decode(tgt_in, memory, tgt_mask, tgt_padding_mask, tgt_query_mask=query_mask)
            logits = self.head(out).flatten(end_dim=1)
            loss += n * F.cross_entropy(logits, tgt_out.flatten(), ignore_index=self.pad_id)
            loss_numel += n
            # After the second iteration (i.e. done with canonical and reverse orderings),
            # remove the [EOS] tokens for the succeeding perms
            if i == 1:
                tgt_out = torch.where(tgt_out == self.eos_id, self.pad_id, tgt_out)
                n = (tgt_out != self.pad_id).sum().item()
        loss /= loss_numel

        self.log('loss', loss)
        return loss


================================================
FILE: src/parseq/strhub/models/trba/__init__.py
================================================
r"""
Baek, Jeonghun, Geewook Kim, Junyeop Lee, Sungrae Park, Dongyoon Han, Sangdoo Yun, Seong Joon Oh, and Hwalsuk Lee.
"What is wrong with scene text recognition model comparisons? dataset and model analysis."
In Proceedings of the IEEE/CVF International Conference on Computer Vision, pp. 4715-4723. 2019.

https://arxiv.org/abs/1904.01906

All source files, except `system.py`, are based on the implementation listed below,
and hence are released under the license of the original.

Source: https://github.com/clovaai/deep-text-recognition-benchmark
License: Apache License 2.0 (see LICENSE file in project root)
"""


================================================
FILE: src/parseq/strhub/models/trba/feature_extraction.py
================================================
import torch.nn as nn

from torchvision.models.resnet import BasicBlock


class ResNet_FeatureExtractor(nn.Module):
    """ FeatureExtractor of FAN (http://openaccess.thecvf.com/content_ICCV_2017/papers/Cheng_Focusing_Attention_Towards_ICCV_2017_paper.pdf) """

    def __init__(self, input_channel, output_channel=512):
        super().__init__()
        self.ConvNet = ResNet(input_channel, output_channel, BasicBlock, [1, 2, 5, 3])

    def forward(self, input):
        return self.ConvNet(input)


class ResNet(nn.Module):

    def __init__(self, input_channel, output_channel, block, layers):
        super().__init__()

        self.output_channel_block = [int(output_channel / 4), int(output_channel / 2), output_channel, output_channel]

        self.inplanes = int(output_channel / 8)
        self.conv0_1 = nn.Conv2d(input_channel, int(output_channel / 16),
                                 kernel_size=3, stride=1, padding=1, bias=False)
        self.bn0_1 = nn.BatchNorm2d(int(output_channel / 16))
        self.conv0_2 = nn.Conv2d(int(output_channel / 16), self.inplanes,
                                 kernel_size=3, stride=1, padding=1, bias=False)
        self.bn0_2 = nn.BatchNorm2d(self.inplanes)
        self.relu = nn.ReLU(inplace=True)

        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.layer1 = self._make_layer(block, self.output_channel_block[0], layers[0])
        self.conv1 = nn.Conv2d(self.output_channel_block[0], self.output_channel_block[
            0], kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.output_channel_block[0])

        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.layer2 = self._make_layer(block, self.output_channel_block[1], layers[1], stride=1)
        self.conv2 = nn.Conv2d(self.output_channel_block[1], self.output_channel_block[
            1], kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(self.output_channel_block[1])

        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=(2, 1), padding=(0, 1))
        self.layer3 = self._make_layer(block, self.output_channel_block[2], layers[2], stride=1)
        self.conv3 = nn.Conv2d(self.output_channel_block[2], self.output_channel_block[
            2], kernel_size=3, stride=1, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.output_channel_block[2])

        self.layer4 = self._make_layer(block, self.output_channel_block[3], layers[3], stride=1)
        self.conv4_1 = nn.Conv2d(self.output_channel_block[3], self.output_channel_block[
            3], kernel_size=2, stride=(2, 1), padding=(0, 1), bias=False)
        self.bn4_1 = nn.BatchNorm2d(self.output_channel_block[3])
        self.conv4_2 = nn.Conv2d(self.output_channel_block[3], self.output_channel_block[
            3], kernel_size=2, stride=1, padding=0, bias=False)
        self.bn4_2 = nn.BatchNorm2d(self.output_channel_block[3])

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv0_1(x)
        x = self.bn0_1(x)
        x = self.relu(x)
        x = self.conv0_2(x)
        x = self.bn0_2(x)
        x = self.relu(x)

        x = self.maxpool1(x)
        x = self.layer1(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.maxpool2(x)
        x = self.layer2(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        x = self.maxpool3(x)
        x = self.layer3(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)

        x = self.layer4(x)
        x = self.conv4_1(x)
        x = self.bn4_1(x)
        x = self.relu(x)
        x = self.conv4_2(x)
        x = self.bn4_2(x)
        x = self.relu(x)

        return x


================================================
FILE: src/parseq/strhub/models/trba/model.py
================================================
import torch.nn as nn

from strhub.models.modules import BidirectionalLSTM
from .feature_extraction import ResNet_FeatureExtractor
from .prediction import Attention
from .transformation import TPS_SpatialTransformerNetwork


class TRBA(nn.Module):

    def __init__(self, img_h, img_w, num_class, num_fiducial=20, input_channel=3, output_channel=512, hidden_size=256,
                 use_ctc=False):
        super().__init__()
        """ Transformation """
        self.Transformation = TPS_SpatialTransformerNetwork(
            F=num_fiducial, I_size=(img_h, img_w), I_r_size=(img_h, img_w),
            I_channel_num=input_channel)

        """ FeatureExtraction """
        self.FeatureExtraction = ResNet_FeatureExtractor(input_channel, output_channel)
        self.FeatureExtraction_output = output_channel
        self.AdaptiveAvgPool = nn.AdaptiveAvgPool2d((None, 1))  # Transform final (imgH/16-1) -> 1

        """ Sequence modeling"""
        self.SequenceModeling = nn.Sequential(
            BidirectionalLSTM(self.FeatureExtraction_output, hidden_size, hidden_size),
            BidirectionalLSTM(hidden_size, hidden_size, hidden_size))
        self.SequenceModeling_output = hidden_size

        """ Prediction """
        if use_ctc:
            self.Prediction = nn.Linear(self.SequenceModeling_output, num_class)
        else:
            self.Prediction = Attention(self.SequenceModeling_output, hidden_size, num_class)

    def forward(self, image, max_label_length, text=None):
        """ Transformation stage """
        image = self.Transformation(image)

        """ Feature extraction stage """
        visual_feature = self.FeatureExtraction(image)
        visual_feature = visual_feature.permute(0, 3, 1, 2)  # [b, c, h, w] -> [b, w, c, h]
        visual_feature = self.AdaptiveAvgPool(visual_feature)  # [b, w, c, h] -> [b, w, c, 1]
        visual_feature = visual_feature.squeeze(3)  # [b, w, c, 1] -> [b, w, c]

        """ Sequence modeling stage """
        contextual_feature = self.SequenceModeling(visual_feature)  # [b, num_steps, hidden_size]

        """ Prediction stage """
        if isinstance(self.Prediction, Attention):
            prediction = self.Prediction(contextual_feature.contiguous(), text, max_label_length)
        else:
            prediction = self.Prediction(contextual_feature.contiguous())  # CTC

        return prediction  # [b, num_steps, num_class]


================================================
FILE: src/parseq/strhub/models/trba/prediction.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F


class Attention(nn.Module):

    def __init__(self, input_size, hidden_size, num_class, num_char_embeddings=256):
        super().__init__()
        self.attention_cell = AttentionCell(input_size, hidden_size, num_char_embeddings)
        self.hidden_size = hidden_size
        self.num_class = num_class
        self.generator = nn.Linear(hidden_size, num_class)
        self.char_embeddings = nn.Embedding(num_class, num_char_embeddings)

    def forward(self, batch_H, text, max_label_length=25):
        """
        input:
            batch_H : contextual_feature H = hidden state of encoder. [batch_size x num_steps x num_class]
            text : the text-index of each image. [batch_size x (max_length+1)]. +1 for [SOS] token. text[:, 0] = [SOS].
        output: probability distribution at each step [batch_size x num_steps x num_class]
        """
        batch_size = batch_H.size(0)
        num_steps = max_label_length + 1  # +1 for [EOS] at end of sentence.

        output_hiddens = batch_H.new_zeros((batch_size, num_steps, self.hidden_size), dtype=torch.float)
        hidden = (batch_H.new_zeros((batch_size, self.hidden_size), dtype=torch.float),
                  batch_H.new_zeros((batch_size, self.hidden_size), dtype=torch.float))

        if self.training:
            for i in range(num_steps):
                char_embeddings = self.char_embeddings(text[:, i])
                # hidden : decoder's hidden s_{t-1}, batch_H : encoder's hidden H, char_embeddings : f(y_{t-1})
                hidden, alpha = self.attention_cell(hidden, batch_H, char_embeddings)
                output_hiddens[:, i, :] = hidden[0]  # LSTM hidden index (0: hidden, 1: Cell)
            probs = self.generator(output_hiddens)

        else:
            targets = text[0].expand(batch_size)  # should be fill with [SOS] token
            probs = batch_H.new_zeros((batch_size, num_steps, self.num_class), dtype=torch.float)

            for i in range(num_steps):
                char_embeddings = self.char_embeddings(targets)
                hidden, alpha = self.attention_cell(hidden, batch_H, char_embeddings)
                probs_step = self.generator(hidden[0])
                probs[:, i, :] = probs_step
                _, next_input = probs_step.max(1)
                targets = next_input

        return probs  # batch_size x num_steps x num_class


class AttentionCell(nn.Module):

    def __init__(self, input_size, hidden_size, num_embeddings):
        super().__init__()
        self.i2h = nn.Linear(input_size, hidden_size, bias=False)
        self.h2h = nn.Linear(hidden_size, hidden_size)  # either i2i or h2h should have bias
        self.score = nn.Linear(hidden_size, 1, bias=False)
        self.rnn = nn.LSTMCell(input_size + num_embeddings, hidden_size)
        self.hidden_size = hidden_size

    def forward(self, prev_hidden, batch_H, char_embeddings):
        # [batch_size x num_encoder_step x num_channel] -> [batch_size x num_encoder_step x hidden_size]
        batch_H_proj = self.i2h(batch_H)
        prev_hidden_proj = self.h2h(prev_hidden[0]).unsqueeze(1)
        e = self.score(torch.tanh(batch_H_proj + prev_hidden_proj))  # batch_size x num_encoder_step * 1

        alpha = F.softmax(e, dim=1)
        context = torch.bmm(alpha.permute(0, 2, 1), batch_H).squeeze(1)  # batch_size x num_channel
        concat_context = torch.cat([context, char_embeddings], 1)  # batch_size x (num_channel + num_embedding)
        cur_hidden = self.rnn(concat_context, prev_hidden)
        return cur_hidden, alpha


================================================
FILE: src/parseq/strhub/models/trba/system.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import partial
from typing import Sequence, Any, Optional

import torch
import torch.nn.functional as F
from pytorch_lightning.utilities.types import STEP_OUTPUT
from timm.models.helpers import named_apply
from torch import Tensor

from strhub.models.base import CrossEntropySystem, CTCSystem
from strhub.models.utils import init_weights
from .model import TRBA as Model


class TRBA(CrossEntropySystem):

    def __init__(self, charset_train: str, charset_test: str, max_label_length: int,
                 batch_size: int, lr: float, warmup_pct: float, weight_decay: float,
                 img_size: Sequence[int], num_fiducial: int, output_channel: int, hidden_size: int,
                 **kwargs: Any) -> None:
        super().__init__(charset_train, charset_test, batch_size, lr, warmup_pct, weight_decay)
        self.save_hyperparameters()
        self.max_label_length = max_label_length
        img_h, img_w = img_size
        self.model = Model(img_h, img_w, len(self.tokenizer), num_fiducial,
                           output_channel=output_channel, hidden_size=hidden_size, use_ctc=False)
        named_apply(partial(init_weights, exclude=['Transformation.LocalizationNetwork.localization_fc2']), self.model)

    @torch.jit.ignore
    def no_weight_decay(self):
        return {'model.Prediction.char_embeddings.weight'}

    def forward(self, images: Tensor, max_length: Optional[int] = None) -> Tensor:
        max_length = self.max_label_length if max_length is None else min(max_length, self.max_label_length)
        text = images.new_full([1], self.bos_id, dtype=torch.long)
        return self.model.forward(images, max_length, text)

    def training_step(self, batch, batch_idx) -> STEP_OUTPUT:
        images, labels = batch
        encoded = self.tokenizer.encode(labels, self.device)
        inputs = encoded[:, :-1]  # remove <eos>
        targets = encoded[:, 1:]  # remove <bos>
        max_length = encoded.shape[1] - 2  # exclude <bos> and <eos> from count
        logits = self.model.forward(images, max_length, inputs)
        loss = F.cross_entropy(logits.flatten(end_dim=1), targets.flatten(), ignore_index=self.pad_id)
        self.log('loss', loss)
        return loss


class TRBC(CTCSystem):

    def __init__(self, charset_train: str, charset_test: str, max_label_length: int,
                 batch_size: int, lr: float, warmup_pct: float, weight_decay: float,
                 img_size: Sequence[int], num_fiducial: int, output_channel: int, hidden_size: int,
                 **kwargs: Any) -> None:
        super().__init__(charset_train, charset_test, batch_size, lr, warmup_pct, weight_decay)
        self.save_hyperparameters()
        self.max_label_length = max_label_length
        img_h, img_w = img_size
        self.model = Model(img_h, img_w, len(self.tokenizer), num_fiducial,
                           output_channel=output_channel, hidden_size=hidden_size, use_ctc=True)
        named_apply(partial(init_weights, exclude=['Transformation.LocalizationNetwork.localization_fc2']), self.model)

    def forward(self, images: Tensor, max_length: Optional[int] = None) -> Tensor:
        # max_label_length is unused in CTC prediction
        return self.model.forward(images, None)

    def training_step(self, batch, batch_idx) -> STEP_OUTPUT:
        images, labels = batch
        loss = self.forward_logits_loss(images, labels)[1]
        self.log('loss', loss)
        return loss


================================================
FILE: src/parseq/strhub/models/trba/transformation.py
================================================
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


class TPS_SpatialTransformerNetwork(nn.Module):
    """ Rectification Network of RARE, namely TPS based STN """

    def __init__(self, F, I_size, I_r_size, I_channel_num=1):
        """ Based on RARE TPS
        input:
            batch_I: Batch Input Image [batch_size x I_channel_num x I_height x I_width]
            I_size : (height, width) of the input image I
            I_r_size : (height, width) of the rectified image I_r
            I_channel_num : the number of channels of the input image I
        output:
            batch_I_r: rectified image [batch_size x I_channel_num x I_r_height x I_r_width]
        """
        super().__init__()
        self.F = F
        self.I_size = I_size
        self.I_r_size = I_r_size  # = (I_r_height, I_r_width)
        self.I_channel_num = I_channel_num
        self.LocalizationNetwork = LocalizationNetwork(self.F, self.I_channel_num)
        self.GridGenerator = GridGenerator(self.F, self.I_r_size)

    def forward(self, batch_I):
        batch_C_prime = self.LocalizationNetwork(batch_I)  # batch_size x K x 2
        # batch_size x n (= I_r_width x I_r_height) x 2
        build_P_prime = self.GridGenerator.build_P_prime(batch_C_prime)
        build_P_prime_reshape = build_P_prime.reshape([build_P_prime.size(0), self.I_r_size[0], self.I_r_size[1], 2])

        if torch.__version__ > "1.2.0":
            batch_I_r = F.grid_sample(batch_I, build_P_prime_reshape, padding_mode='border', align_corners=True)
        else:
            batch_I_r = F.grid_sample(batch_I, build_P_prime_reshape, padding_mode='border')

        return batch_I_r


class LocalizationNetwork(nn.Module):
    """ Localization Network of RARE, which predicts C' (K x 2) from I (I_width x I_height) """

    def __init__(self, F, I_channel_num):
        super().__init__()
        self.F = F
        self.I_channel_num = I_channel_num
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=self.I_channel_num, out_channels=64, kernel_size=3, stride=1, padding=1,
                      bias=False), nn.BatchNorm2d(64), nn.ReLU(True),
            nn.MaxPool2d(2, 2),  # batch_size x 64 x I_height/2 x I_width/2
            nn.Conv2d(64, 128, 3, 1, 1, bias=False), nn.BatchNorm2d(128), nn.ReLU(True),
            nn.MaxPool2d(2, 2),  # batch_size x 128 x I_height/4 x I_width/4
            nn.Conv2d(128, 256, 3, 1, 1, bias=False), nn.BatchNorm2d(256), nn.ReLU(True),
            nn.MaxPool2d(2, 2),  # batch_size x 256 x I_height/8 x I_width/8
            nn.Conv2d(256, 512, 3, 1, 1, bias=False), nn.BatchNorm2d(512), nn.ReLU(True),
            nn.AdaptiveAvgPool2d(1)  # batch_size x 512
        )

        self.localization_fc1 = nn.Sequential(nn.Linear(512, 256), nn.ReLU(True))
        self.localization_fc2 = nn.Linear(256, self.F * 2)

        # Init fc2 in LocalizationNetwork
        self.localization_fc2.weight.data.fill_(0)
        """ see RARE paper Fig. 6 (a) """
        ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
        ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2))
        ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2))
        ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
        ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
        initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
        self.localization_fc2.bias.data = torch.from_numpy(initial_bias).float().view(-1)

    def forward(self, batch_I):
        """
        input:     batch_I : Batch Input Image [batch_size x I_channel_num x I_height x I_width]
        output:    batch_C_prime : Predicted coordinates of fiducial points for input batch [batch_size x F x 2]
        """
        batch_size = batch_I.size(0)
        features = self.conv(batch_I).view(batch_size, -1)
        batch_C_prime = self.localization_fc2(self.localization_fc1(features)).view(batch_size, self.F, 2)
        return batch_C_prime


class GridGenerator(nn.Module):
    """ Grid Generator of RARE, which produces P_prime by multipling T with P """

    def __init__(self, F, I_r_size):
        """ Generate P_hat and inv_delta_C for later """
        super().__init__()
        self.eps = 1e-6
        self.I_r_height, self.I_r_width = I_r_size
        self.F = F
        self.C = self._build_C(self.F)  # F x 2
        self.P = self._build_P(self.I_r_width, self.I_r_height)

        # num_gpu = torch.cuda.device_count()
        # if num_gpu > 1:
        # for multi-gpu, you may need register buffer
        self.register_buffer("inv_delta_C", torch.tensor(
            self._build_inv_delta_C(self.F, self.C)).float())  # F+3 x F+3
        self.register_buffer("P_hat", torch.tensor(self._build_P_hat(self.F, self.C, self.P)).float())  # n x F+3
        # else:
        #     # for fine-tuning with different image width, you may use below instead of self.register_buffer
        #     self.inv_delta_C = torch.tensor(self._build_inv_delta_C(self.F, self.C)).float()  # F+3 x F+3
        #     self.P_hat = torch.tensor(self._build_P_hat(self.F, self.C, self.P)).float()  # n x F+3

    def _build_C(self, F):
        """ Return coordinates of fiducial points in I_r; C """
        ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
        ctrl_pts_y_top = -1 * np.ones(int(F / 2))
        ctrl_pts_y_bottom = np.ones(int(F / 2))
        ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
        ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
        C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
        return C  # F x 2

    def _build_inv_delta_C(self, F, C):
        """ Return inv_delta_C which is needed to calculate T """
        hat_C = np.zeros((F, F), dtype=float)  # F x F
        for i in range(0, F):
            for j in range(i, F):
                r = np.linalg.norm(C[i] - C[j])
                hat_C[i, j] = r
                hat_C[j, i] = r
        np.fill_diagonal(hat_C, 1)
        hat_C = (hat_C ** 2) * np.log(hat_C)
        # print(C.shape, hat_C.shape)
        delta_C = np.concatenate(  # F+3 x F+3
            [
                np.concatenate([np.ones((F, 1)), C, hat_C], axis=1),  # F x F+3
                np.concatenate([np.zeros((2, 3)), np.transpose(C)], axis=1),  # 2 x F+3
                np.concatenate([np.zeros((1, 3)), np.ones((1, F))], axis=1)  # 1 x F+3
            ],
            axis=0
        )
        inv_delta_C = np.linalg.inv(delta_C)
        return inv_delta_C  # F+3 x F+3

    def _build_P(self, I_r_width, I_r_height):
        I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0) / I_r_width  # self.I_r_width
        I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0) / I_r_height  # self.I_r_height
        P = np.stack(  # self.I_r_width x self.I_r_height x 2
            np.meshgrid(I_r_grid_x, I_r_grid_y),
            axis=2
        )
        return P.reshape([-1, 2])  # n (= self.I_r_width x self.I_r_height) x 2

    def _build_P_hat(self, F, C, P):
        n = P.shape[0]  # n (= self.I_r_width x self.I_r_height)
        P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))  # n x 2 -> n x 1 x 2 -> n x F x 2
        C_tile = np.expand_dims(C, axis=0)  # 1 x F x 2
        P_diff = P_tile - C_tile  # n x F x 2
        rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)  # n x F
        rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + self.eps))  # n x F
        P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
        return P_hat  # n x F+3

    def build_P_prime(self, batch_C_prime):
        """ Generate Grid from batch_C_prime [batch_size x F x 2] """
        batch_size = batch_C_prime.size(0)
        batch_inv_delta_C = self.inv_delta_C.repeat(batch_size, 1, 1)
        batch_P_hat = self.P_hat.repeat(batch_size, 1, 1)
        batch_C_prime_with_zeros = torch.cat((batch_C_prime, batch_C_prime.new_zeros(
            (batch_size, 3, 2), dtype=torch.float)), dim=1)  # batch_size x F+3 x 2
        batch_T = torch.bmm(batch_inv_delta_C, batch_C_prime_with_zeros)  # batch_size x F+3 x 2
        batch_P_prime = torch.bmm(batch_P_hat, batch_T)  # batch_size x n x 2
        return batch_P_prime  # batch_size x n x 2


================================================
FILE: src/parseq/strhub/models/utils.py
================================================
from pathlib import PurePath
from typing import Sequence

import torch
from torch import nn

import yaml


class InvalidModelError(RuntimeError):
    """Exception raised for any model-related error (creation, loading)"""


_WEIGHTS_URL = {
    'parseq-tiny': 'https://github.com/baudm/parseq/releases/download/v1.0.0/parseq_tiny-e7a21b54.pt',
    'parseq': 'https://github.com/baudm/parseq/releases/download/v1.0.0/parseq-bb5792a6.pt',
    'abinet': 'https://github.com/baudm/parseq/releases/download/v1.0.0/abinet-1d1e373e.pt',
    'trba': 'https://github.com/baudm/parseq/releases/download/v1.0.0/trba-cfaed284.pt',
    'vitstr': 'https://github.com/baudm/parseq/releases/download/v1.0.0/vitstr-26d0fcf4.pt',
    'crnn': 'https://github.com/baudm/parseq/releases/download/v1.0.0/crnn-679d0e31.pt',
}


def _get_config(experiment: str, **kwargs):
    """Emulates hydra config resolution"""
    root = PurePath(__file__).parents[2]
    with open(root / 'configs/main.yaml', 'r') as f:
        config = yaml.load(f, yaml.Loader)['model']
    with open(root / f'configs/charset/94_full.yaml', 'r') as f:
        config.update(yaml.load(f, yaml.Loader)['model'])
    with open(root / f'configs/experiment/{experiment}.yaml', 'r') as f:
        exp = yaml.load(f, yaml.Loader)
    # Apply base model config
    model = exp['defaults'][0]['override /model']
    with open(root / f'configs/model/{model}.yaml', 'r') as f:
        config.update(yaml.load(f, yaml.Loader))
    # Apply experiment config
    if 'model' in exp:
        config.update(exp['model'])
    config.update(kwargs)
    # Workaround for now: manually cast the lr to the correct type.
    config['lr'] = float(config['lr'])
    return config


def _get_model_class(key):
    if 'abinet' in key:
        from .abinet.system import ABINet as ModelClass
    elif 'crnn' in key:
        from .crnn.system import CRNN as ModelClass
    elif 'parseq' in key:
        from .parseq.system import PARSeq as ModelClass
    elif 'trba' in key:
        from .trba.system import TRBA as ModelClass
    elif 'trbc' in key:
        from .trba.system import TRBC as ModelClass
    elif 'vitstr' in key:
        from .vitstr.system import ViTSTR as ModelClass
    else:
        raise InvalidModelError("Unable to find model class for '{}'".format(key))
    return ModelClass


def get_pretrained_weights(experiment):
    try:
        url = _WEIGHTS_URL[experiment]
    except KeyError:
        raise InvalidModelError("No pretrained weights found for '{}'".format(experiment)) from None
    return torch.hub.load_state_dict_from_url(url=url, map_location='cpu', check_hash=True)


def create_model(experiment: str, pretrained: bool = False, **kwargs):
    try:
        config = _get_config(experiment, **kwargs)
    except FileNotFoundError:
        raise InvalidModelError("No configuration found for '{}'".format(experiment)) from None
    ModelClass = _get_model_class(experiment)
    model = ModelClass(**config)
    if pretrained:
        model.load_state_dict(get_pretrained_weights(experiment))
    return model


def load_from_checkpoint(checkpoint_path: str, **kwargs):
    if checkpoint_path.startswith('pretrained='):
        model_id = checkpoint_path.split('=', maxsplit=1)[1]
        model = create_model(model_id, True, **kwargs)
    else:
        ModelClass = _get_model_class(checkpoint_path)
        model = ModelClass.load_from_checkpoint(checkpoint_path, **kwargs)
    return model


def parse_model_args(args):
    kwargs = {}
    arg_types = {t.__name__: t for t in [int, float, str]}
    arg_types['bool'] = lambda v: v.lower() == 'true'  # special handling for bool
    for arg in args:
        name, value = arg.split('=', maxsplit=1)
        name, arg_type = name.split(':', maxsplit=1)
        kwargs[name] = arg_types[arg_type](value)
    return kwargs


def init_weights(module: nn.Module, name: str = '', exclude: Sequence[str] = ()):
    """Initialize the weights using the typical initialization schemes used in SOTA models."""
    if any(map(name.startswith, exclude)):
        return
    if isinstance(module, nn.Linear):
        nn.init.trunc_normal_(module.weight, std=.02)
        if module.bias is not None:
            nn.init.zeros_(module.bias)
    elif isinstance(module, nn.Embedding):
        nn.init.trunc_normal_(module.weight, std=.02)
        if module.padding_idx is not None:
            module.weight.data[module.padding_idx].zero_()
    elif isinstance(module, nn.Conv2d):
        nn.init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu')
        if module.bias is not None:
            nn.init.zeros_(module.bias)
    elif isinstance(module, (nn.LayerNorm, nn.BatchNorm2d, nn.GroupNorm)):
        nn.init.ones_(module.weight)
        nn.init.zeros_(module.bias)


================================================
FILE: src/parseq/strhub/models/vitstr/__init__.py
================================================
r"""
Atienza, Rowel. "Vision Transformer for Fast and Efficient Scene Text Recognition."
In International Conference on Document Analysis and Recognition (ICDAR). 2021.

https://arxiv.org/abs/2105.08582

All source files, except `system.py`, are based on the implementation listed below,
and hence are released under the license of the original.

Source: https://github.com/roatienza/deep-text-recognition-benchmark
License: Apache License 2.0 (see LICENSE file in project root)
"""


================================================
FILE: src/parseq/strhub/models/vitstr/model.py
================================================
"""
Implementation of ViTSTR based on timm VisionTransformer.

TODO:
1) distilled deit backbone
2) base deit backbone

Copyright 2021 Rowel Atienza
"""

from timm.models.vision_transformer import VisionTransformer


class ViTSTR(VisionTransformer):
    """
    ViTSTR is basically a ViT that uses DeiT weights.
    Modified head to support a sequence of characters prediction for STR.
    """

    def forward(self, x, seqlen: int = 25):
        x = self.forward_features(x)
        x = x[:, :seqlen]

        # batch, seqlen, embsize
        b, s, e = x.size()
        x = x.reshape(b * s, e)
        x = self.head(x).view(b, s, self.num_classes)
        return x


================================================
FILE: src/parseq/strhub/models/vitstr/system.py
================================================
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Sequence, Any, Optional

import torch
from pytorch_lightning.utilities.types import STEP_OUTPUT
from torch import Tensor

from strhub.models.base import CrossEntropySystem
from strhub.models.utils import init_weights
from .model import ViTSTR as Model


class ViTSTR(CrossEntropySystem):

    def __init__(self, charset_train: str, charset_test: str, max_label_length: int,
                 batch_size: int, lr: float, warmup_pct: float, weight_decay: float,
                 img_size: Sequence[int], patch_size: Sequence[int], embed_dim: int, num_heads: int,
                 **kwargs: Any) -> None:
        super().__init__(charset_train, charset_test, batch_size, lr, warmup_pct, weight_decay)
        self.save_hyperparameters()
        self.max_label_length = max_label_length
        # We don't predict <bos> nor <pad>
        self.model = Model(img_size=img_size, patch_size=patch_size, depth=12, mlp_ratio=4, qkv_bias=True,
                           embed_dim=embed_dim, num_heads=num_heads, num_classes=len(self.tokenizer) - 2)
        # Non-zero weight init for the head
        self.model.head.apply(init_weights)

    @torch.jit.ignore
    def no_weight_decay(self):
        return {'model.' + n for n in self.model.no_weight_decay()}

    def forward(self, images: Tensor, max_length: Optional[int] = None) -> Tensor:
        max_length = self.max_label_length if max_length is None else min(max_length, self.max_label_length)
        logits = self.model.forward(images, max_length + 2)  # +2 tokens for [GO] and [s]
        # Truncate to conform to other models. [GO] in ViTSTR is actually used as the padding (therefore, ignored).
        # First position corresponds to the class token, which is unused and ignored in the original work.
        logits = logits[:, 1:]
        return logits

    def training_step(self, batch, batch_idx) -> STEP_OUTPUT:
        images, labels = batch
        loss = self.forward_logits_loss(images, labels)[1]
        self.log('loss', loss)
        return loss


================================================
FILE: src/parseq/test.py
================================================
#!/usr/bin/env python3
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import string
import sys
from dataclasses import dataclass
from typing import List

import torch

from tqdm import tqdm

from strhub.data.module import SceneTextDataModule
from strhub.models.utils import load_from_checkpoint, parse_model_args


@dataclass
class Result:
    dataset: str
    num_samples: int
    accuracy: float
    ned: float
    confidence: float
    label_length: float


def print_results_table(results: List[Result], file=None):
    w = max(map(len, map(getattr, results, ['dataset'] * len(results))))
    w = max(w, len('Dataset'), len('Combined'))
    print('| {:<{w}} | # samples | Accuracy | 1 - NED | Confidence | Label Length |'.format('Dataset', w=w), file=file)
    print('|:{:-<{w}}:|----------:|---------:|--------:|-----------:|-------------:|'.format('----', w=w), file=file)
    c = Result('Combined', 0, 0, 0, 0, 0)
    for res in results:
        c.num_samples += res.num_samples
        c.accuracy += res.num_samples * res.accuracy
        c.ned += res.num_samples * res.ned
        c.confidence += res.num_samples * res.confidence
        c.label_length += res.num_samples * res.label_length
        print(f'| {res.dataset:<{w}} | {res.num_samples:>9} | {res.accuracy:>8.2f} | {res.ned:>7.2f} '
              f'| {res.confidence:>10.2f} | {res.label_length:>12.2f} |', file=file)
    c.accuracy /= c.num_samples
    c.ned /= c.num_samples
    c.confidence /= c.num_samples
    c.label_length /= c.num_samples
    print('|-{:-<{w}}-|-----------|----------|---------|------------|--------------|'.format('----', w=w), file=file)
    print(f'| {c.dataset:<{w}} | {c.num_samples:>9} | {c.accuracy:>8.2f} | {c.ned:>7.2f} '
          f'| {c.confidence:>10.2f} | {c.label_length:>12.2f} |', file=file)


@torch.inference_mode()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('checkpoint', help="Model checkpoint (or 'pretrained=<model_id>')")
    parser.add_argument('--data_root', default='data')
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--num_workers', type=int, default=4)
    parser.add_argument('--cased', action='store_true', default=False, help='Cased comparison')
    parser.add_argument('--punctuation', action='store_true', default=False, help='Check punctuation')
    parser.add_argument('--new', action='store_true', default=False, help='Evaluate on new benchmark datasets')
    parser.add_argument('--rotation', type=int, default=0, help='Angle of rotation (counter clockwise) in degrees.')
    parser.add_argument('--device', default='cuda')
    args, unknown = parser.parse_known_args()
    kwargs = parse_model_args(unknown)

    charset_test = string.digits + string.ascii_lowercase
    if args.cased:
        charset_test += string.ascii_uppercase
    if args.punctuation:
        charset_test += string.punctuation
    kwargs.update({'charset_test': charset_test})
    print(f'Additional keyword arguments: {kwargs}')

    model = load_from_checkpoint(args.checkpoint, **kwargs).eval().to(args.device)
    hp = model.hparams
    datamodule = SceneTextDataModule(args.data_root, '_unused_', hp.img_size, hp.max_label_length, hp.charset_train,
                                     hp.charset_test, args.batch_size, args.num_workers, False, rotation=args.rotation)

    test_set = SceneTextDataModule.TEST_BENCHMARK_SUB + SceneTextDataModule.TEST_BENCHMARK
    if args.new:
        test_set += SceneTextDataModule.TEST_NEW
    test_set = sorted(set(test_set))

    results = {}
    max_width = max(map(len, test_set))
    for name, dataloader in datamodule.test_dataloaders(test_set).items():
        total = 0
        correct = 0
        ned = 0
        confidence = 0
        label_length = 0
        for imgs, labels in tqdm(iter(dataloader), desc=f'{name:>{max_width}}'):
            res = model.test_step((imgs.to(model.device), labels), -1)['output']
            total += res.num_samples
            correct += res.correct
            ned += res.ned
            confidence += res.confidence
            label_length += res.label_length
        accuracy = 100 * correct / total
        mean_ned = 100 * (1 - ned / total)
        mean_conf = 100 * confidence / total
        mean_label_length = label_length / total
        results[name] = Result(name, total, accuracy, mean_ned, mean_conf, mean_label_length)

    result_groups = {
        'Benchmark (Subset)': SceneTextDataModule.TEST_BENCHMARK_SUB,
        'Benchmark': SceneTextDataModule.TEST_BENCHMARK
    }
    if args.new:
        result_groups.update({'New': SceneTextDataModule.TEST_NEW})
    with open(args.checkpoint + '.log.txt', 'w') as f:
        for out in [f, sys.stdout]:
            for group, subset in result_groups.items():
                print(f'{group} set:', file=out)
                print_results_table([results[s] for s in subset], out)
                print('\n', file=out)


if __name__ == '__main__':
    main()


================================================
FILE: src/parseq/tools/art_converter.py
================================================
#!/usr/bin/env python3

import json

with open('train_task2_labels.json', 'r', encoding='utf8') as f:
    d = json.load(f)

with open('gt.txt', 'w', encoding='utf8') as f:
    for k, v in d.items():
        if len(v) != 1:
            print('error', v)
        v = v[0]
        if v['language'].lower() != 'latin':
            # print('Skipping non-Latin:', v)
            continue
        if v['illegibility']:
            # print('Skipping unreadable:', v)
            continue
        label = v['transcription'].strip()
        if not label:
            # print('Skipping blank label')
            continue
        if '#' in label and label != 'LocaL#3':
            # print('Skipping corrupted label')
            continue
        f.write('\t'.join(['train_task2_images/' + k + '.jpg', label]) + '\n')


================================================
FILE: src/parseq/tools/case_sensitive_str_datasets_converter.py
================================================
#!/usr/bin/env python3

import os.path
import sys
from pathlib import Path

d = sys.argv[1]
p = Path(d)

gt = []

num_samples = len(list(p.glob('label/*.txt')))
ext = 'jpg' if p.joinpath('IMG', '1.jpg').is_file() else 'png'

for i in range(1, num_samples + 1):
    img = p.joinpath('IMG', f'{i}.{ext}')
    name = os.path.splitext(img.name)[0]

    with open(p.joinpath('label', f'{i}.txt'), 'r') as f:
        label = f.readline()
    gt.append((os.path.join('IMG', img.name), label))

with open(d + '/lmdb.txt', 'w', encoding='utf-8') as f:
    for line in gt:
        fname, label = line
        fname = fname.strip()
        label = label.strip()
        f.write('\t'.join([fname, label]) + '\n')


================================================
FILE: src/parseq/tools/coco_2_converter.py
================================================
#!/usr/bin/env python3
import argparse
import html
import math
import os
import os.path as osp
from functools import partial

import mmcv
from PIL import Image
from mmocr.utils.fileio import list_to_file


def parse_args():
    parser = argparse.ArgumentParser(
        description='Generate training and validation set of TextOCR '
                    'by cropping box image.')
    parser.add_argument('root_path', help='Root dir path of TextOCR')
    parser.add_argument(
        'n_proc', default=1, type=int, help='Number of processes to run')
    args = parser.parse_args()
    return args


def process_img(args, src_image_root, dst_image_root):
    # Dirty hack for multiprocessing
    img_idx, img_info, anns = args
    src_img = Image.open(osp.join(src_image_root, 'train2014', img_info['file_name']))
    src_w, src_h = src_img.size
    labels = []
    for ann_idx, ann in enumerate(anns):
        text_label = html.unescape(ann['utf8_string'].strip())

        # Ignore empty labels
        if not text_label or ann['class'] != 'machine printed' or ann['language'] != 'english' or \
                ann['legibility'] != 'legible':
            continue

        # Some labels and images with '#' in the middle are actually good, but some aren't, so we just filter them all.
        if text_label != '#' and '#' in text_label:
            continue

        # Some labels use '*' to denote unreadable characters
        if text_label.startswith('*') or text_label.endswith('*'):
            continue

        pad = 2
        x, y, w, h = ann['bbox']
        x, y = max(0, math.floor(x) - pad), max(0, math.floor(y) - pad)
        w, h = math.ceil(w), math.ceil(h)
        x2, y2 = min(src_w, x + w + 2 * pad), min(src_h, y + h + 2 * pad)
        dst_img = src_img.crop((x, y, x2, y2))
        dst_img_name = f'img_{img_idx}_{ann_idx}.jpg'
        dst_img_path = osp.join(dst_image_root, dst_img_name)
        # Preserve JPEG quality
        dst_img.save(dst_img_path, qtables=src_img.quantization)
        labels.append(f'{osp.basename(dst_image_root)}/{dst_img_name}'
                      f' {text_label}')
    src_img.close()
    return labels


def convert_textocr(root_path,
                    dst_image_path,
                    dst_label_filename,
                    annotation_filename,
                    img_start_idx=0,
                    nproc=1):
    annotation_path = osp.join(root_path, annotation_filename)
    if not osp.exists(annotation_path):
        raise Exception(
            f'{annotation_path} not exists, please check and try again.')
    src_image_root = root_path

    # outputs
    dst_label_file = osp.join(root_path, dst_label_filename)
    dst_image_root = osp.join(root_path, dst_image_path)
    os.makedirs(dst_image_root, exist_ok=True)

    annotation = mmcv.load(annotation_path)
    split = 'train' if 'train' in dst_label_filename else 'val'

    process_img_with_path = partial(
        process_img,
        src_image_root=src_image_root,
        dst_image_root=dst_image_root)
    tasks = []
    for img_idx, img_info in enumerate(annotation['imgs'].values()):
        if img_info['set'] != split:
            continue
        ann_ids = annotation['imgToAnns'][str(img_info['id'])]
        anns = [annotation['anns'][str(ann_id)] for ann_id in ann_ids]
        tasks.append((img_idx + img_start_idx, img_info, anns))

    labels_list = mmcv.track_parallel_progress(
        process_img_with_path, tasks, keep_order=True, nproc=nproc)
    final_labels = []
    for label_list in labels_list:
        final_labels += label_list
    list_to_file(dst_label_file, final_labels)
    return len(annotation['imgs'])


def main():
    args = parse_args()
    root_path = args.root_path
    print('Processing training set...')
    num_train_imgs = convert_textocr(
        root_path=root_path,
        dst_image_path='image',
        dst_label_filename='train_label.txt',
        annotation_filename='cocotext.v2.json',
        nproc=args.n_proc)
    print('Processing validation set...')
    convert_textocr(
        root_path=root_path,
        dst_image_path='image_val',
        dst_label_filename='val_label.txt',
        annotation_filename='cocotext.v2.json',
        img_start_idx=num_train_imgs,
        nproc=args.n_proc)
    print('Finish')


if __name__ == '__main__':
    main()


================================================
FILE: src/parseq/tools/coco_text_converter.py
================================================
#!/usr/bin/env python3

for s in ['train', 'val']:
    with open('{}_words_gt.txt'.format(s), 'r', encoding='utf8') as f:
        d = f.readlines()

    with open('{}_lmdb.txt'.format(s), 'w', encoding='utf8') as f:
        for line in d:
            try:
                fname, label = line.split(',', maxsplit=1)
            except ValueError:
                continue
            fname = '{}_words/{}.jpg'.format(s, fname.strip())
            label = label.strip().strip('|')
            f.write('\t'.join([fname, label]) + '\n')


================================================
FILE: src/parseq/tools/create_lmdb_dataset.py
================================================
#!/usr/bin/env python3
""" a modified version of CRNN torch repository https://github.com/bgshih/crnn/blob/master/tool/create_dataset.py """
import io
import os

import fire
import lmdb
import numpy as np
from PIL import Image


def checkImageIsValid(imageBin):
    if imageBin is None:
        return False
    img = Image.open(io.BytesIO(imageBin)).convert('RGB')
    return np.prod(img.size) > 0


def writeCache(env, cache):
    with env.begin(write=True) as txn:
        for k, v in cache.items():
            txn.put(k, v)


def createDataset(inputPath, gtFile, outputPath, checkValid=True):
    """
    Create LMDB dataset for training and evaluation.
    ARGS:
        inputPath  : input folder path where starts imagePath
        outputPath : LMDB output path
        gtFile     : list of image path and label
        checkValid : if true, check the validity of every image
    """
    os.makedirs(outputPath, exist_ok=True)
    env = lmdb.open(outputPath, map_size=1099511627776)

    cache = {}
    cnt = 1

    with open(gtFile, 'r', encoding='utf-8') as f:
        data = f.readlines()

    nSamples = len(data)
    for i, line in enumerate(data):
        imagePath, label = line.strip().split(maxsplit=1)
        imagePath = os.path.join(inputPath, imagePath)
        with open(imagePath, 'rb') as f:
            imageBin = f.read()
        if checkValid:
            try:
                img = Image.open(io.BytesIO(imageBin)).convert('RGB')
            except IOError as e:
                with open(outputPath + '/error_image_log.txt', 'a') as log:
                    log.write('{}-th image data occured error: {}, {}\n'.format(i, imagePath, e))
                continue
            if np.prod(img.size) == 0:
                print('%s is not a valid image' % imagePath)
                continue

        imageKey = 'image-%09d'.encode() % cnt
        labelKey = 'label-%09d'.encode() % cnt
        cache[imageKey] = imageBin
        cache[labelKey] = label.encode()

        if cnt % 1000 == 0:
            writeCache(env, cache)
            cache = {}
            print('Written %d / %d' % (cnt, nSamples))
        cnt += 1
    nSamples = cnt - 1
    cache['num-samples'.encode()] = str(nSamples).encode()
    writeCache(env, cache)
    env.close()
    print('Created dataset with %d samples' % nSamples)


if __name__ == '__main__':
    fire.Fire(createDataset)


================================================
FILE: src/parseq/tools/filter_lmdb.py
================================================
#!/usr/bin/env python3
import io
import os
from argparse import ArgumentParser

import numpy as np
import lmdb
from PIL import Image


def main():
    parser = ArgumentParser()
    parser.add_argument('inputs', nargs='+', help='Path to input LMDBs')
    parser.add_argument('--output', help='Path to output LMDB')
    parser.add_argument('--min_image_dim', type=int, default=8)
    args = parser.parse_args()

    os.makedirs(args.output, exist_ok=True)
    with lmdb.open(args.output, map_size=1099511627776) as env_out:
        in_samples = 0
        out_samples = 0
        samples_per_chunk = 1000
        for lmdb_in in args.inputs:
            with lmdb.open(lmdb_in, readonly=True, max_readers=1, lock=False) as env_in:
                with env_in.begin() as txn:
                    num_samples = int(txn.get('num-samples'.encode()))
                in_samples += num_samples
                chunks = np.array_split(range(num_samples), num_samples // samples_per_chunk)
                for chunk in chunks:
                    cache = {}
                    with env_in.begin() as txn:
                        for index in chunk:
                            index += 1  # lmdb starts at 1
                            image_key = f'image-{index:09d}'.encode()
                            image_bin = txn.get(image_key)
                            img = Image.open(io.BytesIO(image_bin))
                            w, h = img.size
                            if w < args.min_image_dim or h < args.min_image_dim:
                                print(f'Skipping: {index}, w = {w}, h = {h}')
                                continue
                            out_samples += 1  # increment. start at 1
                            label_key = f'label-{index:09d}'.encode()
                            out_label_key = f'label-{out_samples:09d}'.encode()
                            out_image_key = f'image-{out_samples:09d}'.encode()
                            cache[out_label_key] = txn.get(label_key)
                            cache[out_image_key] = image_bin
                    with env_out.begin(write=True) as txn:
                        for k, v in cache.items():
                            txn.put(k, v)
                    print(f'Written samples from {chunk[0]} to {chunk[-1]}')
        with env_out.begin(write=True) as txn:
            txn.put('num-samples'.encode(), str(out_samples).encode())
        print(f'Written {out_samples} samples to {args.output} out of {in_samples} input samples.')


if __name__ == '__main__':
    main()


================================================
FILE: src/parseq/tools/lsvt_converter.py
================================================
#!/usr/bin/env python3
import argparse
import os
import os.path as osp
import re
from functools import partial

import mmcv
import numpy as np
from PIL import Image
from mmocr.utils.fileio import list_to_file


def parse_args():
    parser = argparse.ArgumentParser(
        description='Generate training set of LSVT '
                    'by cropping box image.')
    parser.add_argument('root_path', help='Root dir path of LSVT')
    parser.add_argument(
        'n_proc', default=1, type=int, help='Number of processes to run')
    args = parser.parse_args()
    return args


def process_img(args, src_image_root, dst_image_root):
    # Dirty hack for multiprocessing
    img_idx, img_info, anns = args
    try:
        src_img = Image.open(osp.join(src_image_root, 'train_full_images_0/{}.jpg'.format(img_info)))
    except IOError:
        src_img = Image.open(osp.join(src_image_root, 'train_full_images_1/{}.jpg'.format(img_info)))
    blacklist = ['LOFTINESS*']
    whitelist = ['#Find YOUR Fun#', 'Story #', '*0#']
    labels = []
    for ann_idx, ann in enumerate(anns):
        text_label = ann['transcription']

        # Ignore illegible or words with non-Latin characters
        if ann['illegibility'] or re.findall(r'[\u4e00-\u9fff]+', text_label) or text_label in blacklist or \
                ('#' in text_label and text_label not in whitelist):
            continue

        points = np.asarray(ann['points'])
        x1, y1 = points.min(axis=0)
        x2, y2 = points.max(axis=0)

        dst_img = src_img.crop((x1, y1, x2, y2))
        dst_img_name = f'img_{img_idx}_{ann_idx}.jpg'
        dst_img_path = osp.join(dst_image_root, dst_img_name)
        # Preserve JPEG quality
        dst_img.save(dst_img_path, qtables=src_img.quantization)
        labels.append(f'{osp.basename(dst_image_root)}/{dst_img_name}'
                      f' {text_label}')
    src_img.close()
    return labels


def convert_lsvt(root_path,
                 dst_image_path,
                 dst_label_filename,
                 annotation_filename,
                 img_start_idx=0,
                 nproc=1):
    annotation_path = osp.join(root_path, annotation_filename)
    if not osp.exists(annotation_path):
        raise Exception(
            f'{annotation_path} not exists, please check and try again.')
    src_image_root = root_path

    # outputs
    dst_label_file = osp.join(root_path, dst_label_filename)
    dst_image_root = osp.join(root_path, dst_image_path)
    os.makedirs(dst_image_root, exist_ok=True)

    annotation = mmcv.load(annotation_path)

    process_img_with_path = partial(
        process_img,
        src_image_root=src_image_root,
        dst_image_root=dst_image_root)
    tasks = []
    for img_idx, (img_info, anns) in enumerate(annotation.items()):
        tasks.append((img_idx + img_start_idx, img_info, anns))
    labels_list = mmcv.track_parallel_progress(
        process_img_with_path, tasks, keep_order=True, nproc=nproc)
    final_labels = []
    for label_list in labels_list:
        final_labels += label_list
    list_to_file(dst_label_file, final_labels)
    return len(annotation)


def main():
    args = parse_args()
    root_path = args.root_path
    print('Processing training set...')
    convert_lsvt(
        root_path=root_path,
        dst_image_path='image_train',
        dst_label_filename='train_label.txt',
        annotation_filename='train_full_labels.json',
        nproc=args.n_proc)
    print('Finish')


if __name__ == '__main__':
    main()


================================================
FILE: src/parseq/tools/mlt19_converter.py
================================================
#!/usr/bin/env python3

import sys

root = sys.argv[1]

with open(root + '/gt.txt', 'r') as f:
    d = f.readlines()

with open(root + '/lmdb.txt', 'w') as f:
    for line in d:
        img, script, label = line.split(',', maxsplit=2)
        label = label.strip()
        if label and script in ['Latin', 'Symbols']:
            f.write('\t'.join([img, label]) + '\n')


================================================
FILE: src/parseq/tools/openvino_converter.py
================================================
#!/usr/bin/env python3
import math
import os
import os.path as osp
from argparse import ArgumentParser
from functools import partial

import mmcv
from PIL import Image

from mmocr.utils.fileio import list_to_file


def parse_args():
    parser = ArgumentParser(description='Generate training and validation set '
                            'of OpenVINO annotations for Open '
                            'Images by cropping box image.')
    parser.add_argument(
        'root_path', help='Root dir containing images and annotations')
    parser.add_argument(
        'n_proc', default=1, type=int, help='Number of processes to run')
    args = parser.parse_args()
    return args


def process_img(args, src_image_root, dst_image_root):
    # Dirty hack for multiprocessing
    img_idx, img_info, anns = args
    src_img = Image.open(osp.join(src_image_root, img_info['file_name']))
    labels = []
    for ann_idx, ann in enumerate(anns):
        attrs = ann['attributes']
        text_label = attrs['transcription']

        # Ignore illegible or non-English words
        if not attrs['legible'] or attrs['language'] != 'english':
            continue

        x, y, w, h = ann['bbox']
        x, y = max(0, math.floor(x)), max(0, math.floor(y))
        w, h = math.ceil(w), math.ceil(h)
        dst_img = src_img.crop((x, y, x + w, y + h))
        dst_img_name = f'img_{img_idx}_{ann_idx}.jpg'
        dst_img_path = osp.join(dst_image_root, dst_img_name)
        # Preserve JPEG quality
        dst_img.save(dst_img_path, qtables=src_img.quantization)
        labels.append(f'{osp.basename(dst_image_root)}/{dst_img_name}'
                      f' {text_label}')
    src_img.close()
    return labels


def convert_openimages(root_path,
                       dst_image_path,
                       dst_label_filename,
                       annotation_filename,
                       img_start_idx=0,
                       nproc=1):
    annotation_path = osp.join(root_path, annotation_filename)
    if not osp.exists(annotation_path):
        raise Exception(
            f'{annotation_path} not exists, please check and try again.')
    src_image_root = root_path

    # outputs
    dst_label_file = osp.join(root_path, dst_label_filename)
    dst_image_root = osp.join(root_path, dst_image_path)
    os.makedirs(dst_image_root, exist_ok=True)

    annotation = mmcv.load(annotation_path)

    process_img_with_path = partial(
        process_img,
        src_image_root=src_image_root,
        dst_image_root=dst_image_root)
    tasks = []
    anns = {}
    for ann in annotation['annotations']:
        anns.setdefault(ann['image_id'], []).append(ann)
    for img_idx, img_info in enumerate(annotation['images']):
        tasks.append((img_idx + img_start_idx, img_info, anns[img_info['id']]))
    labels_list = mmcv.track_parallel_progress(
        process_img_with_path, tasks, keep_order=True, nproc=nproc)
    final_labels = []
    for label_list in labels_list:
        final_labels += label_list
    list_to_file(dst_label_file, final_labels)
    return len(annotation['images'])


def main():
    args = parse_args()
    root_path = args.root_path
    print('Processing training set...')
    num_train_imgs = 0
    for s in '125f':
        num_train_imgs = convert_openimages(
            root_path=root_path,
            dst_image_path=f'image_{s}',
            dst_label_filename=f'train_{s}_label.txt',
            annotation_filename=f'text_spotting_openimages_v5_train_{s}.json',
            img_start_idx=num_train_imgs,
            nproc=args.n_proc)
    print('Processing validation set...')
    convert_openimages(
        root_path=root_path,
        dst_image_path='image_val',
        dst_label_filename='val_label.txt',
        annotation_filename='text_spotting_openimages_v5_validation.json',
        img_start_idx=num_train_imgs,
        nproc=args.n_proc)
    print('Finish')


if __name__ == '__main__':
    main()


================================================
FILE: src/parseq/tools/test_abinet_lm_acc.py
================================================
#!/usr/bin/env python3
import argparse
import string
import sys

import torch
import torch.nn.functional as F
from torch import Tensor
from torch.nn.utils.rnn import pad_sequence

from tqdm import tqdm

from strhub.data.module import SceneTextDataModule
from strhub.models.abinet.system import ABINet

sys.path.insert(0, '.')
from hubconf import _get_config
from test import Result, print_results_table


class ABINetLM(ABINet):

    def _encode(self, labels):
        targets = [torch.arange(self.max_label_length + 1)]  # dummy target. used to set pad_sequence() length
        lengths = []
        for label in labels:
            targets.append(torch.as_tensor([self.tokenizer._stoi[c] for c in label]))
            lengths.append(len(label) + 1)
        targets = pad_sequence(targets, batch_first=True, padding_value=0)[1:]  # exclude dummy target
        lengths = torch.as_tensor(lengths, device=self.device)
        targets = F.one_hot(targets, len(self.tokenizer._stoi))[..., :len(self.tokenizer._stoi) - 2].float().to(self.device)
        return targets, lengths

    def forward(self, labels: Tensor, max_length: int = None) -> Tensor:
        targets, lengths = self._encode(labels)
        return self.model.language(targets, lengths)['logits']


def main():
    parser = argparse.ArgumentParser(description='Measure the word accuracy of ABINet LM using the ground truth as input')
    parser.add_argument('checkpoint', help='Official pretrained weights for ABINet-LV (best-train-abinet.pth)')
    parser.add_argument('--data_root', default='data')
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--num_workers', type=int, default=4)
    parser.add_argument('--new', action='store_true', default=False, help='Evaluate on new benchmark datasets')
    parser.add_argument('--device', default='cuda')
    args = parser.parse_args()

    # charset used by original ABINet
    charset = string.ascii_lowercase + '1234567890'
    ckpt = torch.load(args.checkpoint)

    config = _get_config('abinet', charset_train=charset, charset_test=charset)
    model = ABINetLM(**config)
    model.model.load_state_dict(ckpt['model'])

    model = model.eval().to(args.device)
    model.freeze()  # disable autograd
    hp = model.hparams
    datamodule = SceneTextDataModule(args.data_root, '_unused_', hp.img_size, hp.max_label_length, hp.charset_train,
                                     hp.charset_test, args.batch_size, args.num_workers, False)

    test_set = SceneTextDataModule.TEST_BENCHMARK
    if args.new:
        test_set += SceneTextDataModule.TEST_NEW
    test_set = sorted(set(test_set))

    results = {}
    max_width = max(map(len, test_set))
    for name, dataloader in datamodule.test_dataloaders(test_set).items():
        total = 0
        correct = 0
        ned = 0
        confidence = 0
        label_length = 0
        for _, labels in tqdm(iter(dataloader), desc=f'{name:>{max_width}}'):
            res = model.test_step((labels, labels), -1)['output']
            total += res.num_samples
            correct += res.correct
            ned += res.ned
            confidence += res.confidence
            label_length += res.label_length
        accuracy = 100 * correct / total
        mean_ned = 100 * (1 - ned / total)
        mean_conf = 100 * confidence / total
        mean_label_length = label_length / total
        results[name] = Result(name, total, accuracy, mean_ned, mean_conf, mean_label_length)

    result_groups = {
        'Benchmark': SceneTextDataModule.TEST_BENCHMARK
    }
    if args.new:
        result_groups.update({'New': SceneTextDataModule.TEST_NEW})
    for group, subset in result_groups.items():
        print(f'{group} set:')
        print_results_table([results[s] for s in subset])
        print('\n')


if __name__ == '__main__':
    main()


================================================
FILE: src/parseq/tools/textocr_converter.py
================================================
#!/usr/bin/env python3
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import math
import os
import os.path as osp
from functools import partial

import mmcv
import numpy as np
from PIL import Image
from mmocr.utils.fileio import list_to_file


def parse_args():
    parser = argparse.ArgumentParser(
        description='Generate training and validation set of TextOCR '
                    'by cropping box image.')
    parser.add_argument('root_path', help='Root dir path of TextOCR')
    parser.add_argument(
        'n_proc', default=1, type=int, help='Number of processes to run')
    parser.add_argument('--rectify_pose', action='store_true',
                        help='Fix pose of rotated text to make them horizontal')
    args = parser.parse_args()
    return args


def rectify_image_pose(image, top_left, points):
    # Points-based heuristics for determining text orientation w.r.t. bounding box
    points = np.asarray(points).reshape(-1, 2)
    dist = ((points - np.asarray(top_left)) ** 2).sum(axis=1)
    left_midpoint = (points[0] + points[-1]) / 2
    right_corner_points = ((points - left_midpoint) ** 2).sum(axis=1).argsort()[-2:]
    right_midpoint = points[right_corner_points].sum(axis=0) / 2
    d_x, d_y = abs(right_midpoint - left_midpoint)

    if dist[0] + dist[-1] <= dist[right_corner_points].sum():
        if d_x >= d_y:
            rot = 0
        else:
            rot = 90
    else:
        if d_x >= d_y:
            rot = 180
        else:
            rot = -90
    if rot:
        image = image.rotate(rot, expand=True)
    return image


def process_img(args, src_image_root, dst_image_root):
    # Dirty hack for multiprocessing
    img_idx, img_info, anns, rectify_pose = args
    src_img = Image.open(osp.join(src_image_root, img_info['file_name']))
    labels = []
    for ann_idx, ann in enumerate(anns):
        text_label = ann['utf8_string']

        # Ignore illegible or non-English words
        if text_label == '.':
            continue

        x, y, w, h = ann['bbox']
        x, y = max(0, math.floor(x)), max(0, math.floor(y))
        w, h = math.ceil(w), math.ceil(h)
        dst_img = src_img.crop((x, y, x + w, y + h))
        if rectify_pose:
            dst_img = rectify_image_pose(dst_img, (x, y), ann['points'])
        dst_img_name = f'img_{img_idx}_{ann_idx}.jpg'
        dst_img_path = osp.join(dst_image_root, dst_img_name)
        # Preserve JPEG quality
        dst_img.save(dst_img_path, qtables=src_img.quantization)
        labels.append(f'{osp.basename(dst_image_root)}/{dst_img_name}'
                      f' {text_label}')
    src_img.close()
    return labels


def convert_textocr(root_path,
                    dst_image_path,
                    dst_label_filename,
                    annotation_filename,
                    img_start_idx=0,
                    nproc=1,
                    rectify_pose=False):
    annotation_path = osp.join(root_path, annotation_filename)
    if not osp.exists(annotation_path):
        raise Exception(
            f'{annotation_path} not exists, please check and try again.')
    src_image_root = root_path

    # outputs
    dst_label_file = osp.join(root_path, dst_label_filename)
    dst_image_root = osp.join(root_path, dst_image_path)
    os.makedirs(dst_image_root, exist_ok=True)

    annotation = mmcv.load(annotation_path)

    process_img_with_path = partial(
        process_img,
        src_image_root=src_image_root,
        dst_image_root=dst_image_root)
    tasks = []
    for img_idx, img_info in enumerate(annotation['imgs'].values()):
        ann_ids = annotation['imgToAnns'][img_info['id']]
        anns = [annotation['anns'][ann_id] for ann_id in ann_ids]
        tasks.append((img_idx + img_start_idx, img_info, anns, rectify_pose))
    labels_list = mmcv.track_parallel_progress(
        process_img_with_path, tasks, keep_order=True, nproc=nproc)
    final_labels = []
    for label_list in labels_list:
        final_labels += label_list
    list_to_file(dst_label_file, final_labels)
    return len(annotation['imgs'])


def main():
    args = parse_args()
    root_path = args.root_path
    print('Processing training set...')
    num_train_imgs = convert_textocr(
        root_path=root_path,
        dst_image_path='image',
        dst_label_filename='train_label.txt',
        annotation_filename='TextOCR_0.1_train.json',
        nproc=args.n_proc,
        rectify_pose=args.rectify_pose)
    print('Processing validation set...')
    convert_textocr(
        root_path=root_path,
        dst_image_path='image',
        dst_label_filename='val_label.txt',
        annotation_filename='TextOCR_0.1_val.json',
        img_start_idx=num_train_imgs,
        nproc=args.n_proc,
        rectify_pose=args.rectify_pose)
    print('Finish')


if __name__ == '__main__':
    main()


================================================
FILE: src/parseq/train.py
================================================
#!/usr/bin/env python3
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pathlib import Path

from omegaconf import DictConfig, open_dict
import hydra
from hydra.core.hydra_config import HydraConfig

from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, StochasticWeightAveraging
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.strategies import DDPStrategy
from pytorch_lightning.utilities.model_summary import summarize

from strhub.data.module import SceneTextDataModule
from strhub.models.base import BaseSystem
from strhub.models.utils import get_pretrained_weights


@hydra.main(config_path='configs', config_name='main', version_base='1.2')
def main(config: DictConfig):
    trainer_strategy = None
    with open_dict(config):
        # Resolve absolute path to data.root_dir
        config.data.root_dir = hydra.utils.to_absolute_path(config.data.root_dir)
        # Special handling for GPU-affected config
        gpus = config.trainer.get('gpus', 0)
        if gpus:
            # Use mixed-precision training
            config.trainer.precision = 16
        if gpus > 1:
            # Use DDP
            config.trainer.strategy = 'ddp'
            # DDP optimizations
            trainer_strategy = DDPStrategy(find_unused_parameters=False, gradient_as_bucket_view=True)
            # Scale steps-based config
            config.trainer.val_check_interval //= gpus
            if config.trainer.get('max_steps', -1) > 0:
                config.trainer.max_steps //= gpus

    # Special handling for PARseq
    if config.model.get('perm_mirrored', False):
        assert config.model.perm_num % 2 == 0, 'perm_num should be even if perm_mirrored = True'

    model: BaseSystem = hydra.utils.instantiate(config.model)
    # If specified, use pretrained weights to initialize the model
    if config.pretrained is not None:
        model.load_state_dict(get_pretrained_weights(config.pretrained))
    print(summarize(model, max_depth=1 if model.hparams.name.startswith('parseq') else 2))

    datamodule: SceneTextDataModule = hydra.utils.instantiate(config.data)

    checkpoint = ModelCheckpoint(monitor='val_accuracy', mode='max', save_top_k=3, save_last=True,
                                 filename='{epoch}-{step}-{val_accuracy:.4f}-{val_NED:.4f}')
    swa = StochasticWeightAveraging(swa_epoch_start=0.75)
    cwd = HydraConfig.get().runtime.output_dir if config.ckpt_path is None else \
        str(Path(config.ckpt_path).parents[1].absolute())
    trainer: Trainer = hydra.utils.instantiate(config.trainer, logger=TensorBoardLogger(cwd, '', '.'),
                                               strategy=trainer_strategy, enable_model_summary=False,
                                               callbacks=[checkpoint, swa])
    trainer.fit(model, datamodule=datamodule, ckpt_path=config.ckpt_path)


if __name__ == '__main__':
    main()


================================================
FILE: src/parseq/tune.py
================================================
#!/usr/bin/env python3
# Scene Text Recognition Model Hub
# Copyright 2022 Darwin Bautista
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import math
import os
import shutil
from pathlib import Path

from omegaconf import DictConfig, open_dict
import hydra
from hydra.core.hydra_config import HydraConfig

import numpy as np

from pytorch_lightning import Trainer, LightningModule
from pytorch_lightning.loggers import TensorBoardLogger

from ray import tune
from ray.tune import CLIReporter
from ray.tune.integration.pytorch_lightning import TuneReportCheckpointCallback
from ray.tune.ray_trial_executor import RayTrialExecutor
from ray.tune.schedulers import MedianStoppingRule
from ray.tune.suggest.ax import AxSearch

from strhub.data.module import SceneTextDataModule
from strhub.models.base import BaseSystem

log = logging.getLogger(__name__)


class MetricTracker(tune.Stopper):
    """Tracks the trend of the metric. Stops downward/stagnant trials. Assumes metric is being maximized."""

    def __init__(self, metric, max_t, patience: int = 3, window: int = 3) -> None:
        super().__init__()
        self.metric = metric
        self.trial_history = {}
        self.max_t = max_t
        self.training_iteration = 0
        self.eps = 0.01  # sensitivity
        self.patience = patience  # number of consecutive downward/stagnant samples to trigger early stoppage.
        self.kernel = self.gaussian_pdf(np.arange(window) - window // 2, sigma=0.6)
        # Extra samples to keep in order to have better MAs + gradients for the middle p samples.
        self.buffer = 2 * (len(self.kernel) // 2) + 2

    @staticmethod
    def gaussian_pdf(x, sigma=1.):
        return np.exp(-(x / sigma)**2 / 2) / (sigma * np.sqrt(2 * np.pi))

    @staticmethod
    def moving_average(x, k):
        return np.convolve(x, k, 'valid') / k.sum()

    def __call__(self, trial_id, result):
        self.training_iteration = result['training_iteration']
        if np.isnan(result['loss']) or self.training_iteration >= self.max_t:
            try:
                del self.trial_history[trial_id]
            except KeyError:
                pass
            return True
        history = self.trial_history.get(trial_id, [])
        # FIFO queue of metric values.
        history = history[-(self.patience + self.buffer - 1):] + [result[self.metric]]
        # Only start checking once we have enough data. At least one non-zero sample is required.
        if len(history) == self.patience + self.buffer and sum(history) > 0:
            smooth_grad = np.gradient(self.moving_average(history, self.kernel))[1:-1]  # discard edge values.
            # Check if trend is downward or stagnant
            if (smooth_grad < self.eps).all():
                log.info(f'Stopping trial = {trial_id}, hist = {history}, grad = {smooth_grad}')
                try:
                    del self.trial_history[trial_id]
                except KeyError:
                    pass
                return True
        self.trial_history[trial_id] = history
        return False

    def stop_all(self):
        return False


class TuneReportCheckpointPruneCallback(TuneReportCheckpointCallback):

    def _handle(self, trainer: Trainer, pl_module: LightningModule):
        self._checkpoint._handle(trainer, pl_module)
        # Prune older checkpoints
        for old in sorted(Path(tune.get_trial_dir()).glob('checkpoint_epoch=*-step=*'), key=os.path.getmtime)[:-1]:
            log.info(f'Deleting old checkpoint: {old}')
            shutil.rmtree(old)
        self._report._handle(trainer, pl_module)


def train(hparams, config, checkpoint_dir=None):
    with open_dict(config):
        config.model.lr = hparams['lr']
        # config.model.weight_decay = hparams['wd']

    model: BaseSystem = hydra.utils.instantiate(config.model)
    datamodule: SceneTextDataModule = hydra.utils.instantiate(config.data)

    tune_callback = TuneReportCheckpointPruneCallback({
        'loss': 'val_loss',
        'NED': 'val_NED',
        'accuracy': 'val_accuracy'
    })
    ckpt_path = None if checkpoint_dir is None else os.path.join(checkpoint_dir, 'checkpoint')
    trainer: Trainer = hydra.utils.instantiate(config.trainer, enable_progress_bar=False, enable_checkpointing=False,
                                               logger=TensorBoardLogger(save_dir=tune.get_trial_dir(), name='',
                                                                        version='.'),
                                               callbacks=[tune_callback])
    trainer.fit(model, datamodule=datamodule, ckpt_path=ckpt_path)


@hydra.main(config_path='configs', config_name='tune', version_base='1.2')
def main(config: DictConfig):
    # Special handling for PARseq
    if config.model.get('perm_mirrored', False):
        assert config.model.perm_num % 2 == 0, 'perm_num should be even if perm_mirrored = True'
    # Modify config
    with open_dict(config):
        # Use mixed-precision training
        if config.trainer.get('gpus', 0):
            config.trainer.precision = 16
        # Resolve absolute path to data.root_dir
        config.data.root_dir = hydra.utils.to_absolute_path(config.data.root_dir)

    hparams = {
        'lr': tune.loguniform(config.tune.lr.min, config.tune.lr.max),
        # 'wd': tune.loguniform(config.tune.wd.min, config.tune.wd.max),
    }

    steps_per_epoch = len(hydra.utils.instantiate(config.data).train_dataloader())
    val_steps = steps_per_epoch * config.trainer.max_epochs / config.trainer.val_check_interval
    max_t = round(0.75 * val_steps)
    warmup_t = round(config.model.warmup_pct * val_steps)
    scheduler = MedianStoppingRule(time_attr='training_iteration', grace_period=warmup_t)

    # Always start by evenly diving the range in log scale.
    lr = hparams['lr']
    start = np.log10(lr.lower)
    stop = np.log10(lr.upper)
    num = math.ceil(stop - start) + 1
    initial_points = [{'lr': np.clip(x, lr.lower, lr.upper).item()} for x in reversed(np.logspace(start, stop, num))]
    search_alg = AxSearch(points_to_evaluate=initial_points)

    reporter = CLIReporter(
        parameter_columns=['lr'],
        metric_columns=['loss', 'accuracy', 'training_iteration'])

    out_dir = Path(HydraConfig.get().runtime.output_dir if config.tune.resume_dir is None else config.tune.resume_dir)

    analysis = tune.run(
        tune.with_parameters(train, config=config),
        name=out_dir.name,
        metric='NED',
        mode='max',
        stop=MetricTracker('NED', max_t),
        config=hparams,
        resources_per_trial={
            'cpu': 1,
            'gpu': config.tune.gpus_per_trial
        },
        num_samples=config.tune.num_samples,
        local_dir=str(out_dir.parent.absolute()),
        search_alg=search_alg,
        scheduler=scheduler,
        progress_reporter=reporter,
        resume=config.tune.resume_dir is not None,
        trial_executor=RayTrialExecutor(result_buffer_length=0)  # disable result buffering
    )

    print('Best hyperparameters found were: ', analysis.best_config)


if __name__ == '__main__':
    main()


================================================
FILE: test.py
================================================
import torch
import random
import numpy as np
import os

from PIL import Image
from tqdm import tqdm
from contextlib import nullcontext
from os.path import join as ospj
from torchvision.utils import save_image
from omegaconf import OmegaConf
from pytorch_lightning import seed_everything
from dataset.dataloader import get_dataloader

from util import *
from metrics import calc_fid, calc_lpips


def predict(cfgs, model, sampler, batch):

    context = nullcontext if cfgs.aae_enabled else torch.no_grad
    
    with context():
        
        batch, batch_uc_1 = prepare_batch(cfgs, batch)

        c, uc_1 = model.conditioner.get_unconditional_conditioning(
            batch,
            batch_uc=batch_uc_1,
            force_uc_zero_embeddings=cfgs.force_uc_zero_embeddings,
        )
        
        x = sampler.get_init_noise(cfgs, model, cond=c, batch=batch, uc=uc_1)
        samples_z = sampler(model, x, cond=c, batch=batch, uc=uc_1, init_step=0,
                            aae_enabled = cfgs.aae_enabled, detailed = cfgs.detailed)

        samples_x = model.decode_first_stage(samples_z)
        samples = torch.clamp((samples_x + 1.0) / 2.0, min=0.0, max=1.0)

        return samples, samples_z


def test(model, sampler, dataloader, cfgs):
    
    output_dir = cfgs.output_dir
    os.system(f"rm -rf {output_dir}")
    os.makedirs(output_dir, exist_ok=True)
    real_dir = ospj(output_dir, "real")
    fake_dir = ospj(output_dir, "fake")
    os.makedirs(real_dir, exist_ok=True)
    os.makedirs(fake_dir, exist_ok=True)

    temp_dir = cfgs.temp_dir
    os.system(f"rm -rf {temp_dir}")
    os.makedirs(ospj(temp_dir, "attn_map"), exist_ok=True)
    os.makedirs(ospj(temp_dir, "seg_map"), exist_ok=True)
    os.makedirs(ospj(temp_dir, "inters"), exist_ok=True)

    if cfgs.ocr_enabled:
        predictor = instantiate_from_config(cfgs.predictor_config)
        predictor.parseq = predictor.parseq.to(sampler.device)

        correct_num = 0
        total_num = 0

    for idx, batch in tqdm(enumerate(dataloader), total=len(dataloader)):

        if idx >= cfgs.max_iter: break

        name = batch["name"][0]
        results, results_z = predict(cfgs, model, sampler, batch)

        # run ocr
        if cfgs.ocr_enabled:
            
            r_bbox = batch["r_bbox"]
            gt_txt = batch["label"]
            results_crop = []
            for i, bbox in enumerate(r_bbox):
                r_top, r_bottom, r_left, r_right = bbox
                results_crop.append(results[i, :, r_top:r_bottom, r_left:r_right])
            pred_txt = predictor.img2txt(results_crop)

            correct_count = sum([int(pred_txt[i].lower()==gt_txt[i].lower()) for i in range(len(gt_txt))])
            print(f"Expected text: {batch['label']}")
            if correct_count < len(gt_txt):
                print(f"\033[1;31m OCR Result: {pred_txt} \033[0m")
            else:
                print(f"\033[1;32m OCR Result: {pred_txt} \033[0m")
            correct_num += correct_count
            total_num += len(gt_txt)
        
        # save results
        result = results.cpu().numpy().transpose(0, 2, 3, 1) * 255
        result = np.concatenate(result, axis = -2)

        outputs = []
        for key in ("image", "masked", "mask"):
            if key in batch:
                output = batch[key]
                if key != "mask":
                    output = (output + 1.0) / 2.0
                output = output.cpu().numpy().transpose(0, 2, 3, 1) * 255
                output = np.concatenate(output, axis = -2)
                if key == "mask":
                    output = np.tile(output, (1,1,3))
                outputs.append(output)

        outputs.append(result)
        real = Image.fromarray(outputs[0].astype(np.uint8))
        fake = Image.fromarray(outputs[-1].astype(np.uint8))
        real.save(ospj(output_dir, "real", f"{name}.png"))
        fake.save(ospj(output_dir, "fake", f"{name}.png"))

        output = np.concatenate(outputs, axis = 0)
        output = Image.fromarray(output.astype(np.uint8))
        output.save(ospj(output_dir, f"{name}.png"))

    if cfgs.ocr_enabled:
        print(f"OCR test completed. Mean accuracy: {correct_num/total_num}")
    
    if cfgs.quan_test:
        calc_fid(fake_dir, real_dir)
        calc_lpips(fake_dir, real_dir)


if __name__ == "__main__":

    cfgs = OmegaConf.load("./configs/test.yaml")

    seed = random.randint(0, 2147483647)
    seed_everything(seed)

    model = init_model(cfgs)
    sampler = init_sampling(cfgs)
    dataloader = get_dataloader(cfgs, "val")

    test(model, sampler, dataloader, cfgs)


================================================
FILE: train.py
================================================
import os, sys
import torch
import random
import pytorch_lightning as pl

from omegaconf import OmegaConf
from dataset.dataloader import get_dataloader
from pytorch_lightning import seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint
from torchvision.utils import save_image

from util import *


def train():

    sys.path.append(os.getcwd())

    # torch settings
    torch.multiprocessing.set_start_method('spawn') # multiprocess mode
    torch.set_float32_matmul_precision('medium') # matrix multiply precision

    config_path = 'configs/train.yaml'
    cfgs = OmegaConf.load(config_path)

    seed = random.randint(0, 2147483647)
    seed_everything(seed, workers=True)

    dataloader = get_dataloader(cfgs)
    model = init_model(cfgs)
    model.learning_rate = cfgs.base_learning_rate

    checkpoint_callback = ModelCheckpoint(dirpath = cfgs.save_ckpt_dir, every_n_epochs = cfgs.save_ckpt_freq)

    trainer = pl.Trainer(callbacks = [checkpoint_callback], **cfgs.lightning)
    trainer.fit(model = model, train_dataloaders = dataloader)


if __name__=='__main__':

    train()


================================================
FILE: util.py
================================================
import torch
from omegaconf import OmegaConf
from sgm.util import instantiate_from_config
from sgm.modules.diffusionmodules.sampling import *


def init_model(cfgs):

    model_cfg = OmegaConf.load(cfgs.model_cfg_path)
    ckpt = cfgs.load_ckpt_path

    model = instantiate_from_config(model_cfg.model)
    model.init_from_ckpt(ckpt)

    if cfgs.type == "train":
        model.train()
    else:
        model.to(torch.device("cuda", index=cfgs.gpu))
        model.eval()
        model.freeze()

    return model

def init_sampling(cfgs):

    discretization_config = {
        "target": "sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization",
    }

    guider_config = {
        "target": "sgm.modules.diffusionmodules.guiders.VanillaCFG",
        "params": {"scale": cfgs.scale[0]},
    }

    sampler = EulerEDMSampler(
        num_steps=cfgs.steps,
        discretization_config=discretization_config,
        guider_config=guider_config,
        s_churn=0.0,
        s_tmin=0.0,
        s_tmax=999.0,
        s_noise=1.0,
        verbose=True,
        device=torch.device("cuda", index=cfgs.gpu)
    )

    return sampler

def deep_copy(batch):

    c_batch = {}
    for key in batch:
        if isinstance(batch[key], torch.Tensor):
            c_batch[key] = torch.clone(batch[key])
        elif isinstance(batch[key], (tuple, list)): 
            c_batch[key] = batch[key].copy()
        else:
            c_batch[key] = batch[key]
    
    return c_batch

def prepare_batch(cfgs, batch):

    for key in batch:
        if isinstance(batch[key], torch.Tensor):
            batch[key] = batch[key].to(torch.device("cuda", index=cfgs.gpu))

    batch_uc = deep_copy(batch)

    if "ntxt" in batch:
        batch_uc["txt"] = batch["ntxt"]
    else:
        batch_uc["txt"] = ["" for _ in range(len(batch["txt"]))]

    if "label" in batch:
        batch_uc["label"] = ["" for _ in range(len(batch["label"]))]

    return batch, batch_uc