gitextract_v5pjsrh0/

├── .github/
│   └── workflows
├── .gitignore
├── Advanced_Usage.md
├── CHANGELOG.md
├── CITATION.cff
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── requirements.txt
├── scripts/
│   └── tortoise_tts.py
├── setup.cfg
├── setup.py
├── tortoise/
│   ├── __init__.py
│   ├── api.py
│   ├── api_fast.py
│   ├── data/
│   │   ├── got.txt
│   │   ├── layman.txt
│   │   ├── mel_norms.pth
│   │   ├── riding_hood.txt
│   │   ├── seal_copypasta.txt
│   │   └── tokenizer.json
│   ├── do_tts.py
│   ├── eval.py
│   ├── get_conditioning_latents.py
│   ├── is_this_from_tortoise.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── arch_util.py
│   │   ├── autoregressive.py
│   │   ├── classifier.py
│   │   ├── clvp.py
│   │   ├── cvvp.py
│   │   ├── diffusion_decoder.py
│   │   ├── hifigan_decoder.py
│   │   ├── random_latent_generator.py
│   │   ├── stream_generator.py
│   │   ├── transformer.py
│   │   ├── vocoder.py
│   │   └── xtransformers.py
│   ├── read.py
│   ├── read_fast.py
│   ├── socket_client.py
│   ├── socket_server.py
│   ├── tts_stream.py
│   ├── utils/
│   │   ├── __init__.py
│   │   ├── audio.py
│   │   ├── diffusion.py
│   │   ├── stft.py
│   │   ├── text.py
│   │   ├── tokenizer.py
│   │   ├── typical_sampling.py
│   │   └── wav2vec_alignment.py
│   └── voices/
│       └── cond_latent_example/
│           └── pat.pth
├── tortoise_tts.ipynb
├── tortoise_v2_examples.html
└── voice_customization_guide.md