gitextract_9im7g5qx/

├── .gitignore
├── LICENSE
├── README.md
├── algo.py
├── configs/
│   ├── algo/
│   │   ├── ar.yaml
│   │   ├── d3pm.yaml
│   │   ├── distillation.yaml
│   │   ├── duo.yaml
│   │   ├── duo_base.yaml
│   │   ├── mdlm.yaml
│   │   ├── ot-finetune.yaml
│   │   └── sedd.yaml
│   ├── callbacks/
│   │   ├── checkpoint_every_n_steps.yaml
│   │   ├── checkpoint_monitor.yaml
│   │   ├── grad_record.yaml
│   │   └── learning_rate_monitor.yaml
│   ├── config.yaml
│   ├── data/
│   │   ├── ag_news.yaml
│   │   ├── cifar10.yaml
│   │   ├── fineweb-edu.yaml
│   │   ├── lambada.yaml
│   │   ├── lm1b-gpt2.yaml
│   │   ├── lm1b-streaming.yaml
│   │   ├── lm1b-wrap.yaml
│   │   ├── lm1b.yaml
│   │   ├── openwebtext-split.yaml
│   │   ├── openwebtext-streaming.yaml
│   │   ├── openwebtext.yaml
│   │   ├── ptb.yaml
│   │   ├── scientific_papers_arxiv.yaml
│   │   ├── scientific_papers_pubmed.yaml
│   │   ├── synthetic.yaml
│   │   ├── text8-crop.yaml
│   │   ├── text8.yaml
│   │   ├── wikitext103.yaml
│   │   └── wikitext2.yaml
│   ├── lr_scheduler/
│   │   ├── constant_warmup.yaml
│   │   ├── cosine_decay_warmup.yaml
│   │   └── step_scheduler.yaml
│   ├── model/
│   │   ├── medium.yaml
│   │   ├── small.yaml
│   │   ├── tiny-dimamba.yaml
│   │   ├── tiny.yaml
│   │   └── unet.yaml
│   ├── noise/
│   │   ├── cosine.yaml
│   │   └── log-linear.yaml
│   ├── prior/
│   │   └── none.yaml
│   └── strategy/
│       ├── ddp.yaml
│       └── fsdp.yaml
├── dataloader.py
├── discrete_diffusion_harness.py
├── integral/
│   ├── bert-base-uncased.pkl
│   └── gpt2.pkl
├── main.py
├── metrics.py
├── models/
│   ├── __init__.py
│   ├── dit.py
│   ├── ema.py
│   ├── unet.py
│   └── unit_test_attention.py
├── requirements.txt
├── scripts/
│   ├── distil_owt.sh
│   ├── eval_lm1b_duo.sh
│   ├── eval_owt_ar.sh
│   ├── eval_owt_duo.sh
│   ├── eval_owt_mdlm.sh
│   ├── eval_owt_sedd.sh
│   ├── fid_cifar10_duo_ancestral_cosine.sh
│   ├── fid_cifar10_duo_base_ancestral_cosine.sh
│   ├── fid_cifar10_mdlm_ancestral_cosine.sh
│   ├── gen_ppl_lm1b_ar.sh
│   ├── gen_ppl_lm1b_duo.sh
│   ├── gen_ppl_owt_ar.sh
│   ├── gen_ppl_owt_duo.sh
│   ├── gen_ppl_owt_mdlm.sh
│   ├── gen_ppl_owt_sedd.sh
│   ├── psi_samplers/
│   │   ├── cifar10/
│   │   │   ├── duo_constant_remdm.sh
│   │   │   ├── duo_max_capped_remdm.sh
│   │   │   ├── duo_max_rescale_eta.sh
│   │   │   ├── duo_psi_pc.sh
│   │   │   ├── mdlm_constant_remdm.sh
│   │   │   ├── mdlm_max_capped_remdm.sh
│   │   │   ├── mdlm_max_rescale_eta.sh
│   │   │   └── mdlm_psi_pc.sh
│   │   └── owt/
│   │       ├── duo_loop_remdm.sh
│   │       ├── duo_max_capped_remdm.sh
│   │       ├── duo_max_rescale_eta.sh
│   │       ├── mdlm_loop_remdm.sh
│   │       ├── mdlm_max_capped_remdm.sh
│   │       └── mdlm_max_rescale_eta.sh
│   ├── train_cifar10_duo_base_cosine.sh
│   ├── train_cifar10_duo_cosine.sh
│   ├── train_cifar10_mdlm_cosine.sh
│   ├── train_lm1b_ar.sh
│   ├── train_lm1b_ar_sentencepacking.sh
│   ├── train_lm1b_d3pm.sh
│   ├── train_lm1b_duo.sh
│   ├── train_lm1b_duo_sentencepacking.sh
│   ├── train_lm1b_mdlm.sh
│   ├── train_lm1b_mdlm_sentencepacking.sh
│   ├── train_owt_duo.sh
│   ├── train_owt_duo_finetune.sh
│   ├── train_owt_mdlm.sh
│   ├── train_owt_sedd.sh
│   ├── zero_shot_ar.sh
│   ├── zero_shot_duo.sh
│   ├── zero_shot_mdlm.sh
│   └── zero_shot_sedd.sh
├── trainer_base.py
└── utils.py